diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 0f00688b3598..3bcf6267d6fa 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -1,6229 +1,6416 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_IDMAP #include #include #endif /* HAVE_IDMAP */ #include "zfs_iter.h" #include "zfs_util.h" #include "zfs_comutil.h" libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); static int zfs_do_destroy(int argc, char **argv); static int zfs_do_get(int argc, char **argv); static int zfs_do_inherit(int argc, char **argv); static int zfs_do_list(int argc, char **argv); static int zfs_do_mount(int argc, char **argv); static int zfs_do_rename(int argc, char **argv); static int zfs_do_rollback(int argc, char **argv); static int zfs_do_set(int argc, char **argv); static int zfs_do_upgrade(int argc, char **argv); static int zfs_do_snapshot(int argc, char **argv); static int zfs_do_unmount(int argc, char **argv); static int zfs_do_share(int argc, char **argv); static int zfs_do_unshare(int argc, char **argv); static int zfs_do_send(int argc, char **argv); static int zfs_do_receive(int argc, char **argv); static int zfs_do_promote(int argc, char **argv); static int zfs_do_userspace(int argc, char **argv); static int zfs_do_allow(int argc, char **argv); static int zfs_do_unallow(int argc, char **argv); static int zfs_do_hold(int argc, char **argv); static int zfs_do_holds(int argc, char **argv); static int zfs_do_release(int argc, char **argv); static int zfs_do_diff(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. */ #ifdef DEBUG const char * _umem_debug_init(void) { return ("default,verbose"); /* $UMEM_DEBUG setting */ } const char * _umem_logging_init(void) { return ("fail,contents"); /* $UMEM_LOGGING setting */ } #endif typedef enum { HELP_CLONE, HELP_CREATE, HELP_DESTROY, HELP_GET, HELP_INHERIT, HELP_UPGRADE, HELP_LIST, HELP_MOUNT, HELP_PROMOTE, HELP_RECEIVE, HELP_RENAME, HELP_ROLLBACK, HELP_SEND, HELP_SET, HELP_SHARE, HELP_SNAPSHOT, HELP_UNMOUNT, HELP_UNSHARE, HELP_ALLOW, HELP_UNALLOW, HELP_USERSPACE, HELP_GROUPSPACE, HELP_HOLD, HELP_HOLDS, HELP_RELEASE, - HELP_DIFF + HELP_DIFF, } zfs_help_t; typedef struct zfs_command { const char *name; int (*func)(int argc, char **argv); zfs_help_t usage; } zfs_command_t; /* * Master command table. Each ZFS command has a name, associated function, and * usage message. The usage messages need to be internationalized, so we have * to have a function to return the usage message based on a command index. * * These commands are organized according to how they are displayed in the usage * message. An empty command (one with a NULL name) indicates an empty line in * the generic usage message. */ static zfs_command_t command_table[] = { { "create", zfs_do_create, HELP_CREATE }, { "destroy", zfs_do_destroy, HELP_DESTROY }, { NULL }, { "snapshot", zfs_do_snapshot, HELP_SNAPSHOT }, { "rollback", zfs_do_rollback, HELP_ROLLBACK }, { "clone", zfs_do_clone, HELP_CLONE }, { "promote", zfs_do_promote, HELP_PROMOTE }, { "rename", zfs_do_rename, HELP_RENAME }, { NULL }, { "list", zfs_do_list, HELP_LIST }, { NULL }, { "set", zfs_do_set, HELP_SET }, { "get", zfs_do_get, HELP_GET }, { "inherit", zfs_do_inherit, HELP_INHERIT }, { "upgrade", zfs_do_upgrade, HELP_UPGRADE }, { "userspace", zfs_do_userspace, HELP_USERSPACE }, { "groupspace", zfs_do_userspace, HELP_GROUPSPACE }, { NULL }, { "mount", zfs_do_mount, HELP_MOUNT }, { "unmount", zfs_do_unmount, HELP_UNMOUNT }, { "share", zfs_do_share, HELP_SHARE }, { "unshare", zfs_do_unshare, HELP_UNSHARE }, { NULL }, { "send", zfs_do_send, HELP_SEND }, { "receive", zfs_do_receive, HELP_RECEIVE }, { NULL }, { "allow", zfs_do_allow, HELP_ALLOW }, { NULL }, { "unallow", zfs_do_unallow, HELP_UNALLOW }, { NULL }, { "hold", zfs_do_hold, HELP_HOLD }, { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) zfs_command_t *current_command; static const char * get_usage(zfs_help_t idx) { switch (idx) { case HELP_CLONE: return (gettext("\tclone [-p] [-o property=value] ... " " \n")); case HELP_CREATE: return (gettext("\tcreate [-p] [-o property=value] ... " "\n" "\tcreate [-ps] [-b blocksize] [-o property=value] ... " "-V \n")); case HELP_DESTROY: - return (gettext("\tdestroy [-rRf] \n" - "\tdestroy [-rRd] \n")); + return (gettext("\tdestroy [-fnpRrv] \n" + "\tdestroy [-dnpRrv] " + "@[%][,...]\n")); case HELP_GET: return (gettext("\tget [-rHp] [-d max] " "[-o \"all\" | field[,...]] [-s source[,...]]\n" "\t <\"all\" | property[,...]> " "[filesystem|volume|snapshot] ...\n")); case HELP_INHERIT: return (gettext("\tinherit [-rS] " " ...\n")); case HELP_UPGRADE: return (gettext("\tupgrade [-v]\n" "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); case HELP_LIST: return (gettext("\tlist [-rH][-d max] " "[-o property[,...]] [-t type[,...]] [-s property] ...\n" "\t [-S property] ... " "[filesystem|volume|snapshot|snap] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" "\tmount [-vO] [-o opts] <-a | filesystem>\n")); case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: return (gettext("\treceive [-vnFu] \n" "\treceive [-vnFu] [-d | -e] \n")); case HELP_RENAME: return (gettext("\trename " "\n" "\trename -p \n" "\trename -r ")); case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-vRDp] [-[iI] snapshot] \n")); + return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] " + "\n")); case HELP_SET: return (gettext("\tset " " ...\n")); case HELP_SHARE: return (gettext("\tshare <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot|snap [-r] [-o property=value] ... " "\n")); case HELP_UNMOUNT: return (gettext("\tunmount [-f] " "<-a | filesystem|mountpoint>\n")); case HELP_UNSHARE: return (gettext("\tunshare " "<-a | filesystem|mountpoint>\n")); case HELP_ALLOW: return (gettext("\tallow \n" "\tallow [-ldug] " "<\"everyone\"|user|group>[,...] [,...]\n" "\t \n" "\tallow [-ld] -e [,...] " "\n" "\tallow -c [,...] \n" "\tallow -s @setname [,...] " "\n")); case HELP_UNALLOW: return (gettext("\tunallow [-rldug] " "<\"everyone\"|user|group>[,...]\n" "\t [[,...]] \n" "\tunallow [-rld] -e [[,...]] " "\n" "\tunallow [-r] -c [[,...]] " "\n" "\tunallow [-r] -s @setname [[,...]] " "\n")); case HELP_USERSPACE: return (gettext("\tuserspace [-hniHp] [-o field[,...]] " "[-sS field] ... [-t type[,...]]\n" "\t \n")); case HELP_GROUPSPACE: return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] " "[-sS field] ... [-t type[,...]]\n" "\t \n")); case HELP_HOLD: return (gettext("\thold [-r] ...\n")); case HELP_HOLDS: return (gettext("\tholds [-r] ...\n")); case HELP_RELEASE: return (gettext("\trelease [-r] ...\n")); case HELP_DIFF: return (gettext("\tdiff [-FHt] " "[snapshot|filesystem]\n")); } abort(); /* NOTREACHED */ } void nomem(void) { (void) fprintf(stderr, gettext("internal error: out of memory\n")); exit(1); } /* * Utility function to guarantee malloc() success. */ void * safe_malloc(size_t size) { void *data; if ((data = calloc(1, size)) == NULL) nomem(); return (data); } static char * safe_strdup(char *str) { char *dupstr = strdup(str); if (dupstr == NULL) nomem(); return (dupstr); } /* * Callback routine that will print out information for each of * the properties. */ static int usage_prop_cb(int prop, void *cb) { FILE *fp = cb; (void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop)); if (zfs_prop_readonly(prop)) (void) fprintf(fp, " NO "); else (void) fprintf(fp, "YES "); if (zfs_prop_inheritable(prop)) (void) fprintf(fp, " YES "); else (void) fprintf(fp, " NO "); if (zfs_prop_values(prop) == NULL) (void) fprintf(fp, "-\n"); else (void) fprintf(fp, "%s\n", zfs_prop_values(prop)); return (ZPROP_CONT); } /* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display * a complete usage message. */ static void usage(boolean_t requested) { int i; boolean_t show_properties = B_FALSE; FILE *fp = requested ? stdout : stderr; if (current_command == NULL) { (void) fprintf(fp, gettext("usage: zfs command args ...\n")); (void) fprintf(fp, gettext("where 'command' is one of the following:\n\n")); for (i = 0; i < NCOMMAND; i++) { if (command_table[i].name == NULL) (void) fprintf(fp, "\n"); else (void) fprintf(fp, "%s", get_usage(command_table[i].usage)); } (void) fprintf(fp, gettext("\nEach dataset is of the form: " "pool/[dataset/]*dataset[@name]\n")); } else { (void) fprintf(fp, gettext("usage:\n")); (void) fprintf(fp, "%s", get_usage(current_command->usage)); } if (current_command != NULL && (strcmp(current_command->name, "set") == 0 || strcmp(current_command->name, "get") == 0 || strcmp(current_command->name, "inherit") == 0 || strcmp(current_command->name, "list") == 0)) show_properties = B_TRUE; if (show_properties) { (void) fprintf(fp, gettext("\nThe following properties are supported:\n")); (void) fprintf(fp, "\n\t%-14s %s %s %s\n\n", "PROPERTY", "EDIT", "INHERIT", "VALUES"); /* Iterate over all properties */ (void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE, ZFS_TYPE_DATASET); (void) fprintf(fp, "\t%-15s ", "userused@..."); (void) fprintf(fp, " NO NO \n"); (void) fprintf(fp, "\t%-15s ", "groupused@..."); (void) fprintf(fp, " NO NO \n"); (void) fprintf(fp, "\t%-15s ", "userquota@..."); (void) fprintf(fp, "YES NO | none\n"); (void) fprintf(fp, "\t%-15s ", "groupquota@..."); (void) fprintf(fp, "YES NO | none\n"); + (void) fprintf(fp, "\t%-15s ", "written@"); + (void) fprintf(fp, " NO NO \n"); (void) fprintf(fp, gettext("\nSizes are specified in bytes " "with standard units such as K, M, G, etc.\n")); (void) fprintf(fp, gettext("\nUser-defined properties can " "be specified by using a name containing a colon (:).\n")); (void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ " "properties must be appended with\n" "a user or group specifier of one of these forms:\n" " POSIX name (eg: \"matt\")\n" " POSIX id (eg: \"126829\")\n" " SMB name@domain (eg: \"matt@sun\")\n" " SMB SID (eg: \"S-1-234-567-89\")\n")); } else { (void) fprintf(fp, gettext("\nFor the property list, run: %s\n"), "zfs set|get"); (void) fprintf(fp, gettext("\nFor the delegated permission list, run: %s\n"), "zfs allow|unallow"); } /* * See comments at end of main(). */ if (getenv("ZFS_ABORT") != NULL) { (void) printf("dumping core by request\n"); abort(); } exit(requested ? 0 : 2); } static int parseprop(nvlist_t *props) { char *propname = optarg; char *propval, *strval; if ((propval = strchr(propname, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " "'=' for -o option\n")); return (-1); } *propval = '\0'; propval++; if (nvlist_lookup_string(props, propname, &strval) == 0) { (void) fprintf(stderr, gettext("property '%s' " "specified multiple times\n"), propname); return (-1); } if (nvlist_add_string(props, propname, propval) != 0) nomem(); return (0); } static int parse_depth(char *opt, int *flags) { char *tmp; int depth; depth = (int)strtol(opt, &tmp, 0); if (*tmp) { (void) fprintf(stderr, gettext("%s is not an integer\n"), optarg); usage(B_FALSE); } if (depth < 0) { (void) fprintf(stderr, gettext("Depth can not be negative.\n")); usage(B_FALSE); } *flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE); return (depth); } #define PROGRESS_DELAY 2 /* seconds */ static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; static time_t pt_begin; static char *pt_header = NULL; static boolean_t pt_shown; static void start_progress_timer(void) { pt_begin = time(NULL) + PROGRESS_DELAY; pt_shown = B_FALSE; } static void set_progress_header(char *header) { assert(pt_header == NULL); pt_header = safe_strdup(header); if (pt_shown) { (void) printf("%s: ", header); (void) fflush(stdout); } } static void update_progress(char *update) { if (!pt_shown && time(NULL) > pt_begin) { int len = strlen(update); (void) printf("%s: %s%*.*s", pt_header, update, len, len, pt_reverse); (void) fflush(stdout); pt_shown = B_TRUE; } else if (pt_shown) { int len = strlen(update); (void) printf("%s%*.*s", update, len, len, pt_reverse); (void) fflush(stdout); } } static void finish_progress(char *done) { if (pt_shown) { (void) printf("%s\n", done); (void) fflush(stdout); } free(pt_header); pt_header = NULL; } /* * zfs clone [-p] [-o prop=value] ... * * Given an existing dataset, create a writable copy whose initial contents * are the same as the source. The newly created dataset maintains a * dependency on the original; the original cannot be destroyed so long as * the clone exists. * * The '-p' flag creates all the non-existing ancestors of the target first. */ static int zfs_do_clone(int argc, char **argv) { zfs_handle_t *zhp = NULL; boolean_t parents = B_FALSE; nvlist_t *props; int ret = 0; int c; if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) nomem(); /* check options */ while ((c = getopt(argc, argv, "o:p")) != -1) { switch (c) { case 'o': if (parseprop(props)) return (1); break; case 'p': parents = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); goto usage; } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing source dataset " "argument\n")); goto usage; } if (argc < 2) { (void) fprintf(stderr, gettext("missing target dataset " "argument\n")); goto usage; } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); goto usage; } /* open the source dataset */ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) return (1); if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) { /* * Now create the ancestors of the target dataset. If the * target already exists and '-p' option was used we should not * complain. */ if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) return (0); if (zfs_create_ancestors(g_zfs, argv[1]) != 0) return (1); } /* pass to libzfs */ ret = zfs_clone(zhp, argv[1], props); /* create the mountpoint if necessary */ if (ret == 0) { zfs_handle_t *clone; clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET); if (clone != NULL) { if (zfs_get_type(clone) != ZFS_TYPE_VOLUME) if ((ret = zfs_mount(clone, NULL, 0)) == 0) ret = zfs_share(clone); zfs_close(clone); } } zfs_close(zhp); nvlist_free(props); return (!!ret); usage: if (zhp) zfs_close(zhp); nvlist_free(props); usage(B_FALSE); return (-1); } /* * zfs create [-p] [-o prop=value] ... fs * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size * * Create a new dataset. This command can be used to create filesystems * and volumes. Snapshot creation is handled by 'zfs snapshot'. * For volumes, the user must specify a size to be used. * * The '-s' flag applies only to volumes, and indicates that we should not try * to set the reservation for this volume. By default we set a reservation * equal to the size for any volume. For pools with SPA_VERSION >= * SPA_VERSION_REFRESERVATION, we set a refreservation instead. * * The '-p' flag creates all the non-existing ancestors of the target first. */ static int zfs_do_create(int argc, char **argv) { zfs_type_t type = ZFS_TYPE_FILESYSTEM; zfs_handle_t *zhp = NULL; uint64_t volsize = 0; int c; boolean_t noreserve = B_FALSE; boolean_t bflag = B_FALSE; boolean_t parents = B_FALSE; int ret = 1; nvlist_t *props; uint64_t intval; int canmount = ZFS_CANMOUNT_OFF; if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) nomem(); /* check options */ while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) { switch (c) { case 'V': type = ZFS_TYPE_VOLUME; if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) { (void) fprintf(stderr, gettext("bad volume " "size '%s': %s\n"), optarg, libzfs_error_description(g_zfs)); goto error; } if (nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0) nomem(); volsize = intval; break; case 'p': parents = B_TRUE; break; case 'b': bflag = B_TRUE; if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) { (void) fprintf(stderr, gettext("bad volume " "block size '%s': %s\n"), optarg, libzfs_error_description(g_zfs)); goto error; } if (nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), intval) != 0) nomem(); break; case 'o': if (parseprop(props)) goto error; break; case 's': noreserve = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing size " "argument\n")); goto badusage; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); goto badusage; } } if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) { (void) fprintf(stderr, gettext("'-s' and '-b' can only be " "used when creating a volume\n")); goto badusage; } argc -= optind; argv += optind; /* check number of arguments */ if (argc == 0) { (void) fprintf(stderr, gettext("missing %s argument\n"), zfs_type_to_name(type)); goto badusage; } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); goto badusage; } if (type == ZFS_TYPE_VOLUME && !noreserve) { zpool_handle_t *zpool_handle; uint64_t spa_version; char *p; zfs_prop_t resv_prop; char *strval; if ((p = strchr(argv[0], '/'))) *p = '\0'; zpool_handle = zpool_open(g_zfs, argv[0]); if (p != NULL) *p = '/'; if (zpool_handle == NULL) goto error; spa_version = zpool_get_prop_int(zpool_handle, ZPOOL_PROP_VERSION, NULL); zpool_close(zpool_handle); if (spa_version >= SPA_VERSION_REFRESERVATION) resv_prop = ZFS_PROP_REFRESERVATION; else resv_prop = ZFS_PROP_RESERVATION; volsize = zvol_volsize_to_reservation(volsize, props); if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop), &strval) != 0) { if (nvlist_add_uint64(props, zfs_prop_to_name(resv_prop), volsize) != 0) { nvlist_free(props); nomem(); } } } if (parents && zfs_name_valid(argv[0], type)) { /* * Now create the ancestors of target dataset. If the target * already exists and '-p' option was used we should not * complain. */ if (zfs_dataset_exists(g_zfs, argv[0], type)) { ret = 0; goto error; } if (zfs_create_ancestors(g_zfs, argv[0]) != 0) goto error; } /* pass to libzfs */ if (zfs_create(g_zfs, argv[0], type, props) != 0) goto error; if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) goto error; ret = 0; /* * if the user doesn't want the dataset automatically mounted, * then skip the mount/share step */ if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type)) canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); /* * Mount and/or share the new filesystem as appropriate. We provide a * verbose error message to let the user know that their filesystem was * in fact created, even if we failed to mount or share it. */ if (canmount == ZFS_CANMOUNT_ON) { if (zfs_mount(zhp, NULL, 0) != 0) { (void) fprintf(stderr, gettext("filesystem " "successfully created, but not mounted\n")); ret = 1; } else if (zfs_share(zhp) != 0) { (void) fprintf(stderr, gettext("filesystem " "successfully created, but not shared\n")); ret = 1; } } error: if (zhp) zfs_close(zhp); nvlist_free(props); return (ret); badusage: nvlist_free(props); usage(B_FALSE); return (2); } /* * zfs destroy [-rRf] * zfs destroy [-rRd] * * -r Recursively destroy all children * -R Recursively destroy all dependents, including clones * -f Force unmounting of any dependents * -d If we can't destroy now, mark for deferred destruction * * Destroys the given dataset. By default, it will unmount any filesystems, * and refuse to destroy a dataset that has any dependents. A dependent can * either be a child, or a clone of a child. */ typedef struct destroy_cbdata { boolean_t cb_first; - int cb_force; - int cb_recurse; - int cb_error; - int cb_needforce; - int cb_doclones; - boolean_t cb_closezhp; + boolean_t cb_force; + boolean_t cb_recurse; + boolean_t cb_error; + boolean_t cb_doclones; zfs_handle_t *cb_target; - char *cb_snapname; boolean_t cb_defer_destroy; + boolean_t cb_verbose; + boolean_t cb_parsable; + boolean_t cb_dryrun; + nvlist_t *cb_nvl; + + /* first snap in contiguous run */ + zfs_handle_t *cb_firstsnap; + /* previous snap in contiguous run */ + zfs_handle_t *cb_prevsnap; + int64_t cb_snapused; + char *cb_snapspec; } destroy_cbdata_t; /* * Check for any dependents based on the '-r' or '-R' flags. */ static int destroy_check_dependent(zfs_handle_t *zhp, void *data) { destroy_cbdata_t *cbp = data; const char *tname = zfs_get_name(cbp->cb_target); const char *name = zfs_get_name(zhp); if (strncmp(tname, name, strlen(tname)) == 0 && (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) { /* * This is a direct descendant, not a clone somewhere else in * the hierarchy. */ if (cbp->cb_recurse) goto out; if (cbp->cb_first) { (void) fprintf(stderr, gettext("cannot destroy '%s': " "%s has children\n"), zfs_get_name(cbp->cb_target), zfs_type_to_name(zfs_get_type(cbp->cb_target))); (void) fprintf(stderr, gettext("use '-r' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); } else { /* * This is a clone. We only want to report this if the '-r' * wasn't specified, or the target is a snapshot. */ if (!cbp->cb_recurse && zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT) goto out; if (cbp->cb_first) { (void) fprintf(stderr, gettext("cannot destroy '%s': " "%s has dependent clones\n"), zfs_get_name(cbp->cb_target), zfs_type_to_name(zfs_get_type(cbp->cb_target))); (void) fprintf(stderr, gettext("use '-R' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; + cbp->cb_dryrun = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); } out: zfs_close(zhp); return (0); } static int destroy_callback(zfs_handle_t *zhp, void *data) { - destroy_cbdata_t *cbp = data; + destroy_cbdata_t *cb = data; + const char *name = zfs_get_name(zhp); + + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } /* * Ignore pools (which we've already flagged as an error before getting * here). */ if (strchr(zfs_get_name(zhp), '/') == NULL && zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { zfs_close(zhp); return (0); } - /* - * Bail out on the first error. - */ - if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 || - zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) { - zfs_close(zhp); - return (-1); + if (!cb->cb_dryrun) { + if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || + zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { + zfs_close(zhp); + return (-1); + } } zfs_close(zhp); return (0); } static int -destroy_snap_clones(zfs_handle_t *zhp, void *arg) +destroy_print_cb(zfs_handle_t *zhp, void *arg) { - destroy_cbdata_t *cbp = arg; - char thissnap[MAXPATHLEN]; - zfs_handle_t *szhp; - boolean_t closezhp = cbp->cb_closezhp; - int rv; + destroy_cbdata_t *cb = arg; + const char *name = zfs_get_name(zhp); + int err = 0; - (void) snprintf(thissnap, sizeof (thissnap), - "%s@%s", zfs_get_name(zhp), cbp->cb_snapname); + if (nvlist_exists(cb->cb_nvl, name)) { + if (cb->cb_firstsnap == NULL) + cb->cb_firstsnap = zfs_handle_dup(zhp); + if (cb->cb_prevsnap != NULL) + zfs_close(cb->cb_prevsnap); + /* this snap continues the current range */ + cb->cb_prevsnap = zfs_handle_dup(zhp); + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } + } else if (cb->cb_firstsnap != NULL) { + /* end of this range */ + uint64_t used = 0; + err = zfs_get_snapused_int(cb->cb_firstsnap, + cb->cb_prevsnap, &used); + cb->cb_snapused += used; + zfs_close(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + zfs_close(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; + } + zfs_close(zhp); + return (err); +} - libzfs_print_on_error(g_zfs, B_FALSE); - szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT); - libzfs_print_on_error(g_zfs, B_TRUE); - if (szhp) { - /* - * Destroy any clones of this snapshot - */ - if (zfs_iter_dependents(szhp, B_FALSE, destroy_callback, - cbp) != 0) { - zfs_close(szhp); - if (closezhp) - zfs_close(zhp); - return (-1); +static int +destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) +{ + int err; + assert(cb->cb_firstsnap == NULL); + assert(cb->cb_prevsnap == NULL); + err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb); + if (cb->cb_firstsnap != NULL) { + uint64_t used = 0; + if (err == 0) { + err = zfs_get_snapused_int(cb->cb_firstsnap, + cb->cb_prevsnap, &used); } - zfs_close(szhp); + cb->cb_snapused += used; + zfs_close(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + zfs_close(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; } + return (err); +} - cbp->cb_closezhp = B_TRUE; - rv = zfs_iter_filesystems(zhp, destroy_snap_clones, arg); - if (closezhp) - zfs_close(zhp); - return (rv); +static int +snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + /* Check for clones. */ + if (!cb->cb_doclones) { + cb->cb_target = zhp; + cb->cb_first = B_TRUE; + err = zfs_iter_dependents(zhp, B_TRUE, + destroy_check_dependent, cb); + } + + if (err == 0) { + if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp))) + nomem(); + } + zfs_close(zhp); + return (err); +} + +static int +gather_snapshots(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb); + if (err == ENOENT) + err = 0; + if (err != 0) + goto out; + + if (cb->cb_verbose) { + err = destroy_print_snapshots(zhp, cb); + if (err != 0) + goto out; + } + + if (cb->cb_recurse) + err = zfs_iter_filesystems(zhp, gather_snapshots, cb); + +out: + zfs_close(zhp); + return (err); +} + +static int +destroy_clones(destroy_cbdata_t *cb) +{ + nvpair_t *pair; + for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL); + pair != NULL; + pair = nvlist_next_nvpair(cb->cb_nvl, pair)) { + zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair), + ZFS_TYPE_SNAPSHOT); + if (zhp != NULL) { + boolean_t defer = cb->cb_defer_destroy; + int err; + + /* + * We can't defer destroy non-snapshots, so set it to + * false while destroying the clones. + */ + cb->cb_defer_destroy = B_FALSE; + err = zfs_iter_dependents(zhp, B_FALSE, + destroy_callback, cb); + cb->cb_defer_destroy = defer; + zfs_close(zhp); + if (err != 0) + return (err); + } + } + return (0); } static int zfs_do_destroy(int argc, char **argv) { destroy_cbdata_t cb = { 0 }; int c; zfs_handle_t *zhp; - char *cp; + char *at; zfs_type_t type = ZFS_TYPE_DATASET; /* check options */ - while ((c = getopt(argc, argv, "dfrR")) != -1) { + while ((c = getopt(argc, argv, "vpndfrR")) != -1) { switch (c) { + case 'v': + cb.cb_verbose = B_TRUE; + break; + case 'p': + cb.cb_verbose = B_TRUE; + cb.cb_parsable = B_TRUE; + break; + case 'n': + cb.cb_dryrun = B_TRUE; + break; case 'd': cb.cb_defer_destroy = B_TRUE; type = ZFS_TYPE_SNAPSHOT; break; case 'f': - cb.cb_force = 1; + cb.cb_force = B_TRUE; break; case 'r': - cb.cb_recurse = 1; + cb.cb_recurse = B_TRUE; break; case 'R': - cb.cb_recurse = 1; - cb.cb_doclones = 1; + cb.cb_recurse = B_TRUE; + cb.cb_doclones = B_TRUE; break; case '?': default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc == 0) { - (void) fprintf(stderr, gettext("missing path argument\n")); + (void) fprintf(stderr, gettext("missing dataset argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } - /* - * If we are doing recursive destroy of a snapshot, then the - * named snapshot may not exist. Go straight to libzfs. - */ - if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) { - int ret = 0; + at = strchr(argv[0], '@'); + if (at != NULL) { + int err = 0; + + /* Build the list of snaps to destroy in cb_nvl. */ + if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); - *cp = '\0'; - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) + *at = '\0'; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) return (1); - *cp = '@'; - cp++; - if (cb.cb_doclones) { - boolean_t defer = cb.cb_defer_destroy; + cb.cb_snapspec = at + 1; + if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || + cb.cb_error) { + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + return (1); + } - /* - * Temporarily ignore the defer_destroy setting since - * it's not supported for clones. - */ - cb.cb_defer_destroy = B_FALSE; - cb.cb_snapname = cp; - if (destroy_snap_clones(zhp, &cb) != 0) { - zfs_close(zhp); - return (1); + if (nvlist_empty(cb.cb_nvl)) { + (void) fprintf(stderr, gettext("could not find any " + "snapshots to destroy; check snapshot names.\n")); + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + return (1); + } + + if (cb.cb_verbose) { + char buf[16]; + zfs_nicenum(cb.cb_snapused, buf, sizeof (buf)); + if (cb.cb_parsable) { + (void) printf("reclaim\t%llu\n", + (u_longlong_t)cb.cb_snapused); + } else if (cb.cb_dryrun) { + (void) printf(gettext("would reclaim %s\n"), + buf); + } else { + (void) printf(gettext("will reclaim %s\n"), + buf); } - cb.cb_defer_destroy = defer; } - ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy); - zfs_close(zhp); - if (ret) { - (void) fprintf(stderr, - gettext("no snapshots destroyed\n")); + if (!cb.cb_dryrun) { + if (cb.cb_doclones) + err = destroy_clones(&cb); + if (err == 0) { + err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl, + cb.cb_defer_destroy); + } } - return (ret != 0); - } - /* Open the given dataset */ - if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) - return (1); + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + if (err != 0) + return (1); + } else { + /* Open the given dataset */ + if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) + return (1); - cb.cb_target = zhp; + cb.cb_target = zhp; - /* - * Perform an explicit check for pools before going any further. - */ - if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && - zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "operation does not apply to pools\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zfs destroy -r " - "%s' to destroy all datasets in the pool\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zpool destroy %s' " - "to destroy the pool itself\n"), zfs_get_name(zhp)); - zfs_close(zhp); - return (1); - } + /* + * Perform an explicit check for pools before going any further. + */ + if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && + zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "operation does not apply to pools\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zfs destroy -r " + "%s' to destroy all datasets in the pool\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zpool destroy %s' " + "to destroy the pool itself\n"), zfs_get_name(zhp)); + zfs_close(zhp); + return (1); + } - /* - * Check for any dependents and/or clones. - */ - cb.cb_first = B_TRUE; - if (!cb.cb_doclones && !cb.cb_defer_destroy && - zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, - &cb) != 0) { - zfs_close(zhp); - return (1); - } + /* + * Check for any dependents and/or clones. + */ + cb.cb_first = B_TRUE; + if (!cb.cb_doclones && + zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, + &cb) != 0) { + zfs_close(zhp); + return (1); + } - if (cb.cb_error || (!cb.cb_defer_destroy && - (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) { - zfs_close(zhp); - return (1); - } + if (cb.cb_error) { + zfs_close(zhp); + return (1); + } - /* - * Do the real thing. The callback will close the handle regardless of - * whether it succeeds or not. - */ + if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, + &cb) != 0) { + zfs_close(zhp); + return (1); + } - if (destroy_callback(zhp, &cb) != 0) - return (1); + /* + * Do the real thing. The callback will close the + * handle regardless of whether it succeeds or not. + */ + if (destroy_callback(zhp, &cb) != 0) + return (1); + } return (0); } static boolean_t is_recvd_column(zprop_get_cbdata_t *cbp) { int i; zfs_get_column_t col; for (i = 0; i < ZFS_GET_NCOLS && (col = cbp->cb_columns[i]) != GET_COL_NONE; i++) if (col == GET_COL_RECVD) return (B_TRUE); return (B_FALSE); } /* * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...] * < all | property[,property]... > < fs | snap | vol > ... * * -r recurse over any child datasets * -H scripted mode. Headers are stripped, and fields are separated * by tabs instead of spaces. * -o Set of fields to display. One of "name,property,value, * received,source". Default is "name,property,value,source". * "all" is an alias for all five. * -s Set of sources to allow. One of * "local,default,inherited,received,temporary,none". Default is * all six. * -p Display values in parsable (literal) format. * * Prints properties for the given datasets. The user can control which * columns to display as well as which property types to allow. */ /* * Invoked to display the properties for a single dataset. */ static int get_callback(zfs_handle_t *zhp, void *data) { char buf[ZFS_MAXPROPLEN]; char rbuf[ZFS_MAXPROPLEN]; zprop_source_t sourcetype; char source[ZFS_MAXNAMELEN]; zprop_get_cbdata_t *cbp = data; nvlist_t *user_props = zfs_get_user_props(zhp); zprop_list_t *pl = cbp->cb_proplist; nvlist_t *propval; char *strval; char *sourceval; boolean_t received = is_recvd_column(cbp); for (; pl != NULL; pl = pl->pl_next) { char *recvdval = NULL; /* * Skip the special fake placeholder. This will also skip over * the name property when 'all' is specified. */ if (pl->pl_prop == ZFS_PROP_NAME && pl == cbp->cb_proplist) continue; if (pl->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, pl->pl_prop, buf, sizeof (buf), &sourcetype, source, sizeof (source), cbp->cb_literal) != 0) { if (pl->pl_all) continue; if (!zfs_prop_valid_for_type(pl->pl_prop, ZFS_TYPE_DATASET)) { (void) fprintf(stderr, gettext("No such property '%s'\n"), zfs_prop_to_name(pl->pl_prop)); continue; } sourcetype = ZPROP_SRC_NONE; (void) strlcpy(buf, "-", sizeof (buf)); } if (received && (zfs_prop_get_recvd(zhp, zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf), cbp->cb_literal) == 0)) recvdval = rbuf; zprop_print_one_property(zfs_get_name(zhp), cbp, zfs_prop_to_name(pl->pl_prop), buf, sourcetype, source, recvdval); } else if (zfs_prop_userquota(pl->pl_user_prop)) { sourcetype = ZPROP_SRC_LOCAL; if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, buf, sizeof (buf), cbp->cb_literal) != 0) { sourcetype = ZPROP_SRC_NONE; (void) strlcpy(buf, "-", sizeof (buf)); } + zprop_print_one_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source, NULL); + } else if (zfs_prop_written(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; + + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + buf, sizeof (buf), cbp->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; + (void) strlcpy(buf, "-", sizeof (buf)); + } + zprop_print_one_property(zfs_get_name(zhp), cbp, pl->pl_user_prop, buf, sourcetype, source, NULL); } else { if (nvlist_lookup_nvlist(user_props, pl->pl_user_prop, &propval) != 0) { if (pl->pl_all) continue; sourcetype = ZPROP_SRC_NONE; strval = "-"; } else { verify(nvlist_lookup_string(propval, ZPROP_VALUE, &strval) == 0); verify(nvlist_lookup_string(propval, ZPROP_SOURCE, &sourceval) == 0); if (strcmp(sourceval, zfs_get_name(zhp)) == 0) { sourcetype = ZPROP_SRC_LOCAL; } else if (strcmp(sourceval, ZPROP_SOURCE_VAL_RECVD) == 0) { sourcetype = ZPROP_SRC_RECEIVED; } else { sourcetype = ZPROP_SRC_INHERITED; (void) strlcpy(source, sourceval, sizeof (source)); } } if (received && (zfs_prop_get_recvd(zhp, pl->pl_user_prop, rbuf, sizeof (rbuf), cbp->cb_literal) == 0)) recvdval = rbuf; zprop_print_one_property(zfs_get_name(zhp), cbp, pl->pl_user_prop, strval, sourcetype, source, recvdval); } } return (0); } static int zfs_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS; char *value, *fields; int ret = 0; int limit = 0; zprop_list_t fake_name = { 0 }; /* * Set up default columns and sources. */ cb.cb_sources = ZPROP_SRC_ALL; cb.cb_columns[0] = GET_COL_NAME; cb.cb_columns[1] = GET_COL_PROPERTY; cb.cb_columns[2] = GET_COL_VALUE; cb.cb_columns[3] = GET_COL_SOURCE; cb.cb_type = ZFS_TYPE_DATASET; /* check options */ while ((c = getopt(argc, argv, ":d:o:s:rHp")) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; break; case 'd': limit = parse_depth(optarg, &flags); break; case 'r': flags |= ZFS_ITER_RECURSE; break; case 'H': cb.cb_scripted = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case 'o': /* * Process the set of columns to display. We zero out * the structure to give us a blank slate. */ bzero(&cb.cb_columns, sizeof (cb.cb_columns)); i = 0; while (*optarg != '\0') { static char *col_subopts[] = { "name", "property", "value", "received", "source", "all", NULL }; if (i == ZFS_GET_NCOLS) { (void) fprintf(stderr, gettext("too " "many fields given to -o " "option\n")); usage(B_FALSE); } switch (getsubopt(&optarg, col_subopts, &value)) { case 0: cb.cb_columns[i++] = GET_COL_NAME; break; case 1: cb.cb_columns[i++] = GET_COL_PROPERTY; break; case 2: cb.cb_columns[i++] = GET_COL_VALUE; break; case 3: cb.cb_columns[i++] = GET_COL_RECVD; flags |= ZFS_ITER_RECVD_PROPS; break; case 4: cb.cb_columns[i++] = GET_COL_SOURCE; break; case 5: if (i > 0) { (void) fprintf(stderr, gettext("\"all\" conflicts " "with specific fields " "given to -o option\n")); usage(B_FALSE); } cb.cb_columns[0] = GET_COL_NAME; cb.cb_columns[1] = GET_COL_PROPERTY; cb.cb_columns[2] = GET_COL_VALUE; cb.cb_columns[3] = GET_COL_RECVD; cb.cb_columns[4] = GET_COL_SOURCE; flags |= ZFS_ITER_RECVD_PROPS; i = ZFS_GET_NCOLS; break; default: (void) fprintf(stderr, gettext("invalid column name " "'%s'\n"), value); usage(B_FALSE); } } break; case 's': cb.cb_sources = 0; while (*optarg != '\0') { static char *source_subopts[] = { "local", "default", "inherited", "received", "temporary", "none", NULL }; switch (getsubopt(&optarg, source_subopts, &value)) { case 0: cb.cb_sources |= ZPROP_SRC_LOCAL; break; case 1: cb.cb_sources |= ZPROP_SRC_DEFAULT; break; case 2: cb.cb_sources |= ZPROP_SRC_INHERITED; break; case 3: cb.cb_sources |= ZPROP_SRC_RECEIVED; break; case 4: cb.cb_sources |= ZPROP_SRC_TEMPORARY; break; case 5: cb.cb_sources |= ZPROP_SRC_NONE; break; default: (void) fprintf(stderr, gettext("invalid source " "'%s'\n"), value); usage(B_FALSE); } } break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing property " "argument\n")); usage(B_FALSE); } fields = argv[0]; if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET) != 0) usage(B_FALSE); argc--; argv++; /* * As part of zfs_expand_proplist(), we keep track of the maximum column * width for each property. For the 'NAME' (and 'SOURCE') columns, we * need to know the maximum name length. However, the user likely did * not specify 'name' as one of the properties to fetch, so we need to * make sure we always include at least this property for * print_get_headers() to work properly. */ if (cb.cb_proplist != NULL) { fake_name.pl_prop = ZFS_PROP_NAME; fake_name.pl_width = strlen(gettext("NAME")); fake_name.pl_next = cb.cb_proplist; cb.cb_proplist = &fake_name; } cb.cb_first = B_TRUE; /* run for each object */ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL, &cb.cb_proplist, limit, get_callback, &cb); if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); else zprop_free_list(cb.cb_proplist); return (ret); } /* * inherit [-rS] ... * * -r Recurse over all children * -S Revert to received value, if any * * For each dataset specified on the command line, inherit the given property * from its parent. Inheriting a property at the pool level will cause it to * use the default value. The '-r' flag will recurse over all children, and is * useful for setting a property on a hierarchy-wide basis, regardless of any * local modifications for each dataset. */ typedef struct inherit_cbdata { const char *cb_propname; boolean_t cb_received; } inherit_cbdata_t; static int inherit_recurse_cb(zfs_handle_t *zhp, void *data) { inherit_cbdata_t *cb = data; zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname); /* * If we're doing it recursively, then ignore properties that * are not valid for this type of dataset. */ if (prop != ZPROP_INVAL && !zfs_prop_valid_for_type(prop, zfs_get_type(zhp))) return (0); return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0); } static int inherit_cb(zfs_handle_t *zhp, void *data) { inherit_cbdata_t *cb = data; return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0); } static int zfs_do_inherit(int argc, char **argv) { int c; zfs_prop_t prop; inherit_cbdata_t cb = { 0 }; char *propname; int ret = 0; int flags = 0; boolean_t received = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "rS")) != -1) { switch (c) { case 'r': flags |= ZFS_ITER_RECURSE; break; case 'S': received = B_TRUE; break; case '?': default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing property argument\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing dataset argument\n")); usage(B_FALSE); } propname = argv[0]; argc--; argv++; if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { if (zfs_prop_readonly(prop)) { (void) fprintf(stderr, gettext( "%s property is read-only\n"), propname); return (1); } if (!zfs_prop_inheritable(prop) && !received) { (void) fprintf(stderr, gettext("'%s' property cannot " "be inherited\n"), propname); if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || prop == ZFS_PROP_REFQUOTA || prop == ZFS_PROP_REFRESERVATION) (void) fprintf(stderr, gettext("use 'zfs set " "%s=none' to clear\n"), propname); return (1); } if (received && (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION)) { (void) fprintf(stderr, gettext("'%s' property cannot " "be reverted to a received value\n"), propname); return (1); } } else if (!zfs_prop_user(propname)) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), propname); usage(B_FALSE); } cb.cb_propname = propname; cb.cb_received = received; if (flags & ZFS_ITER_RECURSE) { ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL, NULL, 0, inherit_recurse_cb, &cb); } else { ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL, NULL, 0, inherit_cb, &cb); } return (ret); } typedef struct upgrade_cbdata { uint64_t cb_numupgraded; uint64_t cb_numsamegraded; uint64_t cb_numfailed; uint64_t cb_version; boolean_t cb_newer; boolean_t cb_foundone; char cb_lastfs[ZFS_MAXNAMELEN]; } upgrade_cbdata_t; static int same_pool(zfs_handle_t *zhp, const char *name) { int len1 = strcspn(name, "/@"); const char *zhname = zfs_get_name(zhp); int len2 = strcspn(zhname, "/@"); if (len1 != len2) return (B_FALSE); return (strncmp(name, zhname, len1) == 0); } static int upgrade_list_callback(zfs_handle_t *zhp, void *data) { upgrade_cbdata_t *cb = data; int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); /* list if it's old/new */ if ((!cb->cb_newer && version < ZPL_VERSION) || (cb->cb_newer && version > ZPL_VERSION)) { char *str; if (cb->cb_newer) { str = gettext("The following filesystems are " "formatted using a newer software version and\n" "cannot be accessed on the current system.\n\n"); } else { str = gettext("The following filesystems are " "out of date, and can be upgraded. After being\n" "upgraded, these filesystems (and any 'zfs send' " "streams generated from\n" "subsequent snapshots) will no longer be " "accessible by older software versions.\n\n"); } if (!cb->cb_foundone) { (void) puts(str); (void) printf(gettext("VER FILESYSTEM\n")); (void) printf(gettext("--- ------------\n")); cb->cb_foundone = B_TRUE; } (void) printf("%2u %s\n", version, zfs_get_name(zhp)); } return (0); } static int upgrade_set_callback(zfs_handle_t *zhp, void *data) { upgrade_cbdata_t *cb = data; int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); int needed_spa_version; int spa_version; if (zfs_spa_version(zhp, &spa_version) < 0) return (-1); needed_spa_version = zfs_spa_version_map(cb->cb_version); if (needed_spa_version < 0) return (-1); if (spa_version < needed_spa_version) { /* can't upgrade */ (void) printf(gettext("%s: can not be " "upgraded; the pool version needs to first " "be upgraded\nto version %d\n\n"), zfs_get_name(zhp), needed_spa_version); cb->cb_numfailed++; return (0); } /* upgrade */ if (version < cb->cb_version) { char verstr[16]; (void) snprintf(verstr, sizeof (verstr), "%llu", (u_longlong_t)cb->cb_version); if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) { /* * If they did "zfs upgrade -a", then we could * be doing ioctls to different pools. We need * to log this history once to each pool. */ verify(zpool_stage_history(g_zfs, history_str) == 0); } if (zfs_prop_set(zhp, "version", verstr) == 0) cb->cb_numupgraded++; else cb->cb_numfailed++; (void) strcpy(cb->cb_lastfs, zfs_get_name(zhp)); } else if (version > cb->cb_version) { /* can't downgrade */ (void) printf(gettext("%s: can not be downgraded; " "it is already at version %u\n"), zfs_get_name(zhp), version); cb->cb_numfailed++; } else { cb->cb_numsamegraded++; } return (0); } /* * zfs upgrade * zfs upgrade -v * zfs upgrade [-r] [-V ] <-a | filesystem> */ static int zfs_do_upgrade(int argc, char **argv) { boolean_t all = B_FALSE; boolean_t showversions = B_FALSE; int ret = 0; upgrade_cbdata_t cb = { 0 }; signed char c; int flags = ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ while ((c = getopt(argc, argv, "rvV:a")) != -1) { switch (c) { case 'r': flags |= ZFS_ITER_RECURSE; break; case 'v': showversions = B_TRUE; break; case 'V': if (zfs_prop_string_to_index(ZFS_PROP_VERSION, optarg, &cb.cb_version) != 0) { (void) fprintf(stderr, gettext("invalid version %s\n"), optarg); usage(B_FALSE); } break; case 'a': all = B_TRUE; break; case '?': default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version)) usage(B_FALSE); if (showversions && (flags & ZFS_ITER_RECURSE || all || cb.cb_version || argc)) usage(B_FALSE); if ((all || argc) && (showversions)) usage(B_FALSE); if (all && argc) usage(B_FALSE); if (showversions) { /* Show info on available versions. */ (void) printf(gettext("The following filesystem versions are " "supported:\n\n")); (void) printf(gettext("VER DESCRIPTION\n")); (void) printf("--- -----------------------------------------" "---------------\n"); (void) printf(gettext(" 1 Initial ZFS filesystem version\n")); (void) printf(gettext(" 2 Enhanced directory entries\n")); - (void) printf(gettext(" 3 Case insensitive and File system " - "unique identifier (FUID)\n")); + (void) printf(gettext(" 3 Case insensitive and filesystem " + "user identifier (FUID)\n")); (void) printf(gettext(" 4 userquota, groupquota " "properties\n")); (void) printf(gettext(" 5 System attributes\n")); (void) printf(gettext("\nFor more information on a particular " "version, including supported releases,\n")); (void) printf("see the ZFS Administration Guide.\n\n"); ret = 0; } else if (argc || all) { /* Upgrade filesystems */ if (cb.cb_version == 0) cb.cb_version = ZPL_VERSION; ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM, NULL, NULL, 0, upgrade_set_callback, &cb); (void) printf(gettext("%llu filesystems upgraded\n"), (u_longlong_t)cb.cb_numupgraded); if (cb.cb_numsamegraded) { (void) printf(gettext("%llu filesystems already at " "this version\n"), (u_longlong_t)cb.cb_numsamegraded); } if (cb.cb_numfailed != 0) ret = 1; } else { /* List old-version filesytems */ boolean_t found; (void) printf(gettext("This system is currently running " "ZFS filesystem version %llu.\n\n"), ZPL_VERSION); flags |= ZFS_ITER_RECURSE; ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, NULL, NULL, 0, upgrade_list_callback, &cb); found = cb.cb_foundone; cb.cb_foundone = B_FALSE; cb.cb_newer = B_TRUE; ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, NULL, NULL, 0, upgrade_list_callback, &cb); if (!cb.cb_foundone && !found) { (void) printf(gettext("All filesystems are " "formatted with the current version.\n")); } } return (ret); } #define USTYPE_USR_BIT (0) #define USTYPE_GRP_BIT (1) #define USTYPE_PSX_BIT (2) #define USTYPE_SMB_BIT (3) #define USTYPE_USR (1 << USTYPE_USR_BIT) #define USTYPE_GRP (1 << USTYPE_GRP_BIT) #define USTYPE_PSX (1 << USTYPE_PSX_BIT) #define USTYPE_SMB (1 << USTYPE_SMB_BIT) #define USTYPE_PSX_USR (USTYPE_PSX | USTYPE_USR) #define USTYPE_SMB_USR (USTYPE_SMB | USTYPE_USR) #define USTYPE_PSX_GRP (USTYPE_PSX | USTYPE_GRP) #define USTYPE_SMB_GRP (USTYPE_SMB | USTYPE_GRP) #define USTYPE_ALL (USTYPE_PSX_USR | USTYPE_SMB_USR \ | USTYPE_PSX_GRP | USTYPE_SMB_GRP) #define USPROP_USED_BIT (0) #define USPROP_QUOTA_BIT (1) #define USPROP_USED (1 << USPROP_USED_BIT) #define USPROP_QUOTA (1 << USPROP_QUOTA_BIT) typedef struct us_node { nvlist_t *usn_nvl; uu_avl_node_t usn_avlnode; uu_list_node_t usn_listnode; } us_node_t; typedef struct us_cbdata { nvlist_t **cb_nvlp; uu_avl_pool_t *cb_avl_pool; uu_avl_t *cb_avl; boolean_t cb_numname; boolean_t cb_nicenum; boolean_t cb_sid2posix; zfs_userquota_prop_t cb_prop; zfs_sort_column_t *cb_sortcol; size_t cb_max_typelen; size_t cb_max_namelen; size_t cb_max_usedlen; size_t cb_max_quotalen; } us_cbdata_t; typedef struct { zfs_sort_column_t *si_sortcol; boolean_t si_num_name; boolean_t si_parsable; } us_sort_info_t; static int us_compare(const void *larg, const void *rarg, void *unused) { const us_node_t *l = larg; const us_node_t *r = rarg; int rc = 0; us_sort_info_t *si = (us_sort_info_t *)unused; zfs_sort_column_t *sortcol = si->si_sortcol; boolean_t num_name = si->si_num_name; nvlist_t *lnvl = l->usn_nvl; nvlist_t *rnvl = r->usn_nvl; for (; sortcol != NULL; sortcol = sortcol->sc_next) { char *lvstr = ""; char *rvstr = ""; uint32_t lv32 = 0; uint32_t rv32 = 0; uint64_t lv64 = 0; uint64_t rv64 = 0; zfs_prop_t prop = sortcol->sc_prop; const char *propname = NULL; boolean_t reverse = sortcol->sc_reverse; switch (prop) { case ZFS_PROP_TYPE: propname = "type"; (void) nvlist_lookup_uint32(lnvl, propname, &lv32); (void) nvlist_lookup_uint32(rnvl, propname, &rv32); if (rv32 != lv32) rc = (rv32 > lv32) ? 1 : -1; break; case ZFS_PROP_NAME: propname = "name"; if (num_name) { (void) nvlist_lookup_uint32(lnvl, propname, &lv32); (void) nvlist_lookup_uint32(rnvl, propname, &rv32); if (rv32 != lv32) rc = (rv32 > lv32) ? 1 : -1; } else { (void) nvlist_lookup_string(lnvl, propname, &lvstr); (void) nvlist_lookup_string(rnvl, propname, &rvstr); rc = strcmp(lvstr, rvstr); } break; case ZFS_PROP_USED: case ZFS_PROP_QUOTA: if (ZFS_PROP_USED == prop) propname = "used"; else propname = "quota"; (void) nvlist_lookup_uint64(lnvl, propname, &lv64); (void) nvlist_lookup_uint64(rnvl, propname, &rv64); if (rv64 != lv64) rc = (rv64 > lv64) ? 1 : -1; default: break; } if (rc) { if (rc < 0) return (reverse ? 1 : -1); else return (reverse ? -1 : 1); } } return (rc); } static inline const char * us_type2str(unsigned field_type) { switch (field_type) { case USTYPE_PSX_USR: return ("POSIX User"); case USTYPE_PSX_GRP: return ("POSIX Group"); case USTYPE_SMB_USR: return ("SMB User"); case USTYPE_SMB_GRP: return ("SMB Group"); default: return ("Undefined"); } } /* * zfs userspace */ static int userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) { us_cbdata_t *cb = (us_cbdata_t *)arg; zfs_userquota_prop_t prop = cb->cb_prop; char *name = NULL; char *propname; char namebuf[32]; char sizebuf[32]; us_node_t *node; uu_avl_pool_t *avl_pool = cb->cb_avl_pool; uu_avl_t *avl = cb->cb_avl; uu_avl_index_t idx; nvlist_t *props; us_node_t *n; zfs_sort_column_t *sortcol = cb->cb_sortcol; unsigned type; const char *typestr; size_t namelen; size_t typelen; size_t sizelen; us_sort_info_t sortinfo = { sortcol, cb->cb_numname }; if (domain == NULL || domain[0] == '\0') { /* POSIX */ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { type = USTYPE_PSX_GRP; struct group *g = getgrgid(rid); if (g) name = g->gr_name; } else { type = USTYPE_PSX_USR; struct passwd *p = getpwuid(rid); if (p) name = p->pw_name; } } else { #ifdef HAVE_IDMAP char sid[ZFS_MAXNAMELEN+32]; uid_t id; uint64_t classes; int err = 0; directory_error_t e; (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid); /* SMB */ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { type = USTYPE_SMB_GRP; err = sid_to_id(sid, B_FALSE, &id); } else { type = USTYPE_SMB_USR; err = sid_to_id(sid, B_TRUE, &id); } if (err == 0) { rid = id; e = directory_name_from_sid(NULL, sid, &name, &classes); if (e != NULL) { directory_error_free(e); return (NULL); } if (name == NULL) name = sid; } #else return (-1); #endif /* HAVE_IDMAP */ } /* * if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) * ug = "group"; * else * ug = "user"; */ if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) propname = "used"; else propname = "quota"; (void) snprintf(namebuf, sizeof (namebuf), "%u", rid); if (name == NULL) name = namebuf; if (cb->cb_nicenum) zfs_nicenum(space, sizebuf, sizeof (sizebuf)); else (void) sprintf(sizebuf, "%llu", (u_longlong_t)space); node = safe_malloc(sizeof (us_node_t)); uu_avl_node_init(node, &node->usn_avlnode, avl_pool); if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { free(node); return (-1); } if (nvlist_add_uint32(props, "type", type) != 0) nomem(); if (cb->cb_numname) { if (nvlist_add_uint32(props, "name", rid) != 0) nomem(); namelen = strlen(namebuf); } else { if (nvlist_add_string(props, "name", name) != 0) nomem(); namelen = strlen(name); } typestr = us_type2str(type); typelen = strlen(gettext(typestr)); if (typelen > cb->cb_max_typelen) cb->cb_max_typelen = typelen; if (namelen > cb->cb_max_namelen) cb->cb_max_namelen = namelen; sizelen = strlen(sizebuf); if (0 == strcmp(propname, "used")) { if (sizelen > cb->cb_max_usedlen) cb->cb_max_usedlen = sizelen; } else { if (sizelen > cb->cb_max_quotalen) cb->cb_max_quotalen = sizelen; } node->usn_nvl = props; n = uu_avl_find(avl, node, &sortinfo, &idx); if (n == NULL) uu_avl_insert(avl, node, idx); else { nvlist_free(props); free(node); node = n; props = node->usn_nvl; } if (nvlist_add_uint64(props, propname, space) != 0) nomem(); return (0); } static inline boolean_t usprop_check(zfs_userquota_prop_t p, unsigned types, unsigned props) { unsigned type; unsigned prop; switch (p) { case ZFS_PROP_USERUSED: type = USTYPE_USR; prop = USPROP_USED; break; case ZFS_PROP_USERQUOTA: type = USTYPE_USR; prop = USPROP_QUOTA; break; case ZFS_PROP_GROUPUSED: type = USTYPE_GRP; prop = USPROP_USED; break; case ZFS_PROP_GROUPQUOTA: type = USTYPE_GRP; prop = USPROP_QUOTA; break; default: /* ALL */ return (B_TRUE); }; return (type & types && prop & props); } #define USFIELD_TYPE (1 << 0) #define USFIELD_NAME (1 << 1) #define USFIELD_USED (1 << 2) #define USFIELD_QUOTA (1 << 3) #define USFIELD_ALL (USFIELD_TYPE | USFIELD_NAME | USFIELD_USED | USFIELD_QUOTA) static int parsefields(unsigned *fieldsp, char **names, unsigned *bits, size_t len) { char *field = optarg; char *delim; do { int i; boolean_t found = B_FALSE; delim = strchr(field, ','); if (delim != NULL) *delim = '\0'; for (i = 0; i < len; i++) if (0 == strcmp(field, names[i])) { found = B_TRUE; *fieldsp |= bits[i]; break; } if (!found) { (void) fprintf(stderr, gettext("invalid type '%s'" "for -t option\n"), field); return (-1); } field = delim + 1; } while (delim); return (0); } static char *type_names[] = { "posixuser", "smbuser", "posixgroup", "smbgroup", "all" }; static unsigned type_bits[] = { USTYPE_PSX_USR, USTYPE_SMB_USR, USTYPE_PSX_GRP, USTYPE_SMB_GRP, USTYPE_ALL }; static char *us_field_names[] = { "type", "name", "used", "quota" }; static unsigned us_field_bits[] = { USFIELD_TYPE, USFIELD_NAME, USFIELD_USED, USFIELD_QUOTA }; static void print_us_node(boolean_t scripted, boolean_t parseable, unsigned fields, size_t type_width, size_t name_width, size_t used_width, size_t quota_width, us_node_t *node) { nvlist_t *nvl = node->usn_nvl; nvpair_t *nvp = NULL; char valstr[ZFS_MAXNAMELEN]; boolean_t first = B_TRUE; boolean_t quota_found = B_FALSE; if (fields & USFIELD_QUOTA && !nvlist_exists(nvl, "quota")) if (nvlist_add_string(nvl, "quota", "none") != 0) nomem(); while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { char *pname = nvpair_name(nvp); data_type_t type = nvpair_type(nvp); uint32_t val32 = 0; uint64_t val64 = 0; char *strval = NULL; unsigned field = 0; unsigned width = 0; int i; for (i = 0; i < 4; i++) { if (0 == strcmp(pname, us_field_names[i])) { field = us_field_bits[i]; break; } } if (!(field & fields)) continue; switch (type) { case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &val32); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &val64); break; case DATA_TYPE_STRING: (void) nvpair_value_string(nvp, &strval); break; default: (void) fprintf(stderr, "Invalid data type\n"); } if (!first) { if (scripted) (void) printf("\t"); else (void) printf(" "); } switch (field) { case USFIELD_TYPE: strval = (char *)us_type2str(val32); width = type_width; break; case USFIELD_NAME: if (type == DATA_TYPE_UINT64) { (void) sprintf(valstr, "%llu", (u_longlong_t) val64); strval = valstr; } width = name_width; break; case USFIELD_USED: case USFIELD_QUOTA: if (type == DATA_TYPE_UINT64) { (void) nvpair_value_uint64(nvp, &val64); if (parseable) (void) sprintf(valstr, "%llu", (u_longlong_t) val64); else zfs_nicenum(val64, valstr, sizeof (valstr)); strval = valstr; } if (field == USFIELD_USED) width = used_width; else { quota_found = B_FALSE; width = quota_width; } break; } if (field == USFIELD_QUOTA && !quota_found) (void) printf("%*s", width, strval); else { if (type == DATA_TYPE_STRING) (void) printf("%-*s", width, strval); else (void) printf("%*s", width, strval); } first = B_FALSE; } (void) printf("\n"); } static void print_us(boolean_t scripted, boolean_t parsable, unsigned fields, unsigned type_width, unsigned name_width, unsigned used_width, unsigned quota_width, boolean_t rmnode, uu_avl_t *avl) { static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" }; us_node_t *node; const char *col; int i; int width[4] = { type_width, name_width, used_width, quota_width }; if (!scripted) { boolean_t first = B_TRUE; for (i = 0; i < 4; i++) { unsigned field = us_field_bits[i]; if (!(field & fields)) continue; col = gettext(us_field_hdr[i]); if (field == USFIELD_TYPE || field == USFIELD_NAME) (void) printf(first?"%-*s":" %-*s", width[i], col); else (void) printf(first?"%*s":" %*s", width[i], col); first = B_FALSE; } (void) printf("\n"); } for (node = uu_avl_first(avl); node != NULL; node = uu_avl_next(avl, node)) { print_us_node(scripted, parsable, fields, type_width, name_width, used_width, used_width, node); if (rmnode) nvlist_free(node->usn_nvl); } } static int zfs_do_userspace(int argc, char **argv) { zfs_handle_t *zhp; zfs_userquota_prop_t p; uu_avl_pool_t *avl_pool; uu_avl_t *avl_tree; uu_avl_walk_t *walk; char *cmd; boolean_t scripted = B_FALSE; boolean_t prtnum = B_FALSE; boolean_t parseable = B_FALSE; boolean_t sid2posix = B_FALSE; int error = 0; int c; zfs_sort_column_t *default_sortcol = NULL; zfs_sort_column_t *sortcol = NULL; unsigned types = USTYPE_PSX_USR | USTYPE_SMB_USR; unsigned fields = 0; unsigned props = USPROP_USED | USPROP_QUOTA; us_cbdata_t cb; us_node_t *node; boolean_t resort_avl = B_FALSE; if (argc < 2) usage(B_FALSE); cmd = argv[0]; if (0 == strcmp(cmd, "groupspace")) /* toggle default group types */ types = USTYPE_PSX_GRP | USTYPE_SMB_GRP; /* check options */ while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) { switch (c) { case 'n': prtnum = B_TRUE; break; case 'H': scripted = B_TRUE; break; case 'p': parseable = B_TRUE; break; case 'o': if (parsefields(&fields, us_field_names, us_field_bits, 4) != 0) return (1); break; case 's': if (zfs_add_sort_column(&sortcol, optarg, B_FALSE) != 0) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), optarg); usage(B_FALSE); } break; case 'S': if (zfs_add_sort_column(&sortcol, optarg, B_TRUE) != 0) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), optarg); usage(B_FALSE); } break; case 't': if (parsefields(&types, type_names, type_bits, 5)) return (1); break; case 'i': sid2posix = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* ok, now we have sorted by default colums (type,name) avl tree */ if (sortcol) { zfs_sort_column_t *sc; for (sc = sortcol; sc; sc = sc->sc_next) { if (sc->sc_prop == ZFS_PROP_QUOTA) { resort_avl = B_TRUE; break; } } } if (!fields) fields = USFIELD_ALL; if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL) return (1); if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t), offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL) nomem(); if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) nomem(); if (sortcol && !resort_avl) cb.cb_sortcol = sortcol; else { (void) zfs_add_sort_column(&default_sortcol, "type", B_FALSE); (void) zfs_add_sort_column(&default_sortcol, "name", B_FALSE); cb.cb_sortcol = default_sortcol; } cb.cb_numname = prtnum; cb.cb_nicenum = !parseable; cb.cb_avl_pool = avl_pool; cb.cb_avl = avl_tree; cb.cb_sid2posix = sid2posix; cb.cb_max_typelen = strlen(gettext("TYPE")); cb.cb_max_namelen = strlen(gettext("NAME")); cb.cb_max_usedlen = strlen(gettext("USED")); cb.cb_max_quotalen = strlen(gettext("QUOTA")); for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { if (!usprop_check(p, types, props)) continue; cb.cb_prop = p; error = zfs_userspace(zhp, p, userspace_cb, &cb); if (error) break; } if (resort_avl) { us_node_t *node; us_node_t *rmnode; uu_list_pool_t *listpool; uu_list_t *list; uu_avl_index_t idx = 0; uu_list_index_t idx2 = 0; listpool = uu_list_pool_create("tmplist", sizeof (us_node_t), offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT); list = uu_list_create(listpool, NULL, UU_DEFAULT); node = uu_avl_first(avl_tree); uu_list_node_init(node, &node->usn_listnode, listpool); while (node != NULL) { rmnode = node; node = uu_avl_next(avl_tree, node); uu_avl_remove(avl_tree, rmnode); if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) { uu_list_insert(list, rmnode, idx2); } } for (node = uu_list_first(list); node != NULL; node = uu_list_next(list, node)) { us_sort_info_t sortinfo = { sortcol, cb.cb_numname }; if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL) uu_avl_insert(avl_tree, node, idx); } uu_list_destroy(list); } /* print & free node`s nvlist memory */ print_us(scripted, parseable, fields, cb.cb_max_typelen, cb.cb_max_namelen, cb.cb_max_usedlen, cb.cb_max_quotalen, B_TRUE, cb.cb_avl); if (sortcol) zfs_free_sort_columns(sortcol); zfs_free_sort_columns(default_sortcol); /* * Finally, clean up the AVL tree. */ if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) nomem(); while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(cb.cb_avl, node); free(node); } uu_avl_walk_end(walk); uu_avl_destroy(avl_tree); uu_avl_pool_destroy(avl_pool); return (error); } /* * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...] * [-s property [-s property]...] [-S property [-S property]...] * ... * * -r Recurse over all children * -d Limit recursion by depth. * -H Scripted mode; elide headers and separate columns by tabs * -o Control which fields to display. * -t Control which object types to display. * -s Specify sort columns, descending order. * -S Specify sort columns, ascending order. * * When given no arguments, lists all filesystems in the system. * Otherwise, list the specified datasets, optionally recursing down them if * '-r' is specified. */ typedef struct list_cbdata { boolean_t cb_first; boolean_t cb_scripted; zprop_list_t *cb_proplist; } list_cbdata_t; /* * Given a list of columns to display, output appropriate headers for each one. */ static void print_header(zprop_list_t *pl) { char headerbuf[ZFS_MAXPROPLEN]; const char *header; int i; boolean_t first = B_TRUE; boolean_t right_justify; for (; pl != NULL; pl = pl->pl_next) { if (!first) { (void) printf(" "); } else { first = B_FALSE; } right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { header = zfs_prop_column_name(pl->pl_prop); right_justify = zfs_prop_align_right(pl->pl_prop); } else { for (i = 0; pl->pl_user_prop[i] != '\0'; i++) headerbuf[i] = toupper(pl->pl_user_prop[i]); headerbuf[i] = '\0'; header = headerbuf; } if (pl->pl_next == NULL && !right_justify) (void) printf("%s", header); else if (right_justify) (void) printf("%*s", (int)pl->pl_width, header); else (void) printf("%-*s", (int)pl->pl_width, header); } (void) printf("\n"); } /* * Given a dataset and a list of fields, print out all the properties according * to the described layout. */ static void print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) { boolean_t first = B_TRUE; char property[ZFS_MAXPROPLEN]; nvlist_t *userprops = zfs_get_user_props(zhp); nvlist_t *propval; char *propstr; boolean_t right_justify; int width; for (; pl != NULL; pl = pl->pl_next) { if (!first) { if (scripted) (void) printf("\t"); else (void) printf(" "); } else { first = B_FALSE; } if (pl->pl_prop == ZFS_PROP_NAME) { (void) strlcpy(property, zfs_get_name(zhp), sizeof(property)); propstr = property; right_justify = zfs_prop_align_right(pl->pl_prop); } else if (pl->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, pl->pl_prop, property, sizeof (property), NULL, NULL, 0, B_FALSE) != 0) propstr = "-"; else propstr = property; right_justify = zfs_prop_align_right(pl->pl_prop); } else if (zfs_prop_userquota(pl->pl_user_prop)) { if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, property, sizeof (property), B_FALSE) != 0) propstr = "-"; else propstr = property; right_justify = B_TRUE; + } else if (zfs_prop_written(pl->pl_user_prop)) { + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + property, sizeof (property), B_FALSE) != 0) + propstr = "-"; + else + propstr = property; + right_justify = B_TRUE; } else { if (nvlist_lookup_nvlist(userprops, pl->pl_user_prop, &propval) != 0) propstr = "-"; else verify(nvlist_lookup_string(propval, ZPROP_VALUE, &propstr) == 0); right_justify = B_FALSE; } width = pl->pl_width; /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ if (scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) (void) printf("%*s", width, propstr); else (void) printf("%-*s", width, propstr); } (void) printf("\n"); } /* * Generic callback function to list a dataset or snapshot. */ static int list_callback(zfs_handle_t *zhp, void *data) { list_cbdata_t *cbp = data; if (cbp->cb_first) { if (!cbp->cb_scripted) print_header(cbp->cb_proplist); cbp->cb_first = B_FALSE; } print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted); return (0); } static int zfs_do_list(int argc, char **argv) { int c; boolean_t scripted = B_FALSE; static char default_fields[] = "name,used,available,referenced,mountpoint"; int types = ZFS_TYPE_DATASET; boolean_t types_specified = B_FALSE; char *fields = NULL; list_cbdata_t cb = { 0 }; char *value; int limit = 0; int ret = 0; zfs_sort_column_t *sortcol = NULL; int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) { switch (c) { case 'o': fields = optarg; break; case 'd': limit = parse_depth(optarg, &flags); break; case 'r': flags |= ZFS_ITER_RECURSE; break; case 'H': scripted = B_TRUE; break; case 's': if (zfs_add_sort_column(&sortcol, optarg, B_FALSE) != 0) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), optarg); usage(B_FALSE); } break; case 'S': if (zfs_add_sort_column(&sortcol, optarg, B_TRUE) != 0) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), optarg); usage(B_FALSE); } break; case 't': types = 0; types_specified = B_TRUE; flags &= ~ZFS_ITER_PROP_LISTSNAPS; while (*optarg != '\0') { static char *type_subopts[] = { "filesystem", "volume", "snapshot", "snap", "all", NULL }; switch (getsubopt(&optarg, type_subopts, &value)) { case 0: types |= ZFS_TYPE_FILESYSTEM; break; case 1: types |= ZFS_TYPE_VOLUME; break; case 2: case 3: types |= ZFS_TYPE_SNAPSHOT; break; case 4: types = ZFS_TYPE_DATASET; break; default: (void) fprintf(stderr, gettext("invalid type '%s'\n"), value); usage(B_FALSE); } } break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (fields == NULL) fields = default_fields; /* * If we are only going to list snapshot names and sort by name, * then we can use faster version. */ if (strcmp(fields, "name") == 0 && zfs_sort_only_by_name(sortcol)) flags |= ZFS_ITER_SIMPLE; /* * If "-o space" and no types were specified, don't display snapshots. */ if (strcmp(fields, "space") == 0 && types_specified == B_FALSE) types &= ~ZFS_TYPE_SNAPSHOT; /* * If the user specifies '-o all', the zprop_get_list() doesn't * normally include the name of the dataset. For 'zfs list', we always * want this property to be first. */ if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET) != 0) usage(B_FALSE); cb.cb_scripted = scripted; cb.cb_first = B_TRUE; ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, limit, list_callback, &cb); zprop_free_list(cb.cb_proplist); zfs_free_sort_columns(sortcol); if (ret == 0 && cb.cb_first && !cb.cb_scripted) (void) fprintf(stderr, gettext("no datasets available\n")); return (ret); } /* * zfs rename * zfs rename -p * zfs rename -r * * Renames the given dataset to another of the same type. * * The '-p' flag creates all the non-existing ancestors of the target first. */ /* ARGSUSED */ static int zfs_do_rename(int argc, char **argv) { zfs_handle_t *zhp; int c; int ret = 0; boolean_t recurse = B_FALSE; boolean_t parents = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "pr")) != -1) { switch (c) { case 'p': parents = B_TRUE; break; case 'r': recurse = B_TRUE; break; case '?': default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing source dataset " "argument\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing target dataset " "argument\n")); usage(B_FALSE); } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if (recurse && parents) { (void) fprintf(stderr, gettext("-p and -r options are mutually " "exclusive\n")); usage(B_FALSE); } if (recurse && strchr(argv[0], '@') == 0) { (void) fprintf(stderr, gettext("source dataset for recursive " "rename must be a snapshot\n")); usage(B_FALSE); } if ((zhp = zfs_open(g_zfs, argv[0], parents ? ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME : ZFS_TYPE_DATASET)) == NULL) return (1); /* If we were asked and the name looks good, try to create ancestors. */ if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) && zfs_create_ancestors(g_zfs, argv[1]) != 0) { zfs_close(zhp); return (1); } ret = (zfs_rename(zhp, argv[1], recurse) != 0); zfs_close(zhp); return (ret); } /* * zfs promote * * Promotes the given clone fs to be the parent */ /* ARGSUSED */ static int zfs_do_promote(int argc, char **argv) { zfs_handle_t *zhp; int ret = 0; /* check options */ if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing clone filesystem" " argument\n")); usage(B_FALSE); } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (zhp == NULL) return (1); ret = (zfs_promote(zhp) != 0); zfs_close(zhp); return (ret); } /* * zfs rollback [-rRf] * * -r Delete any intervening snapshots before doing rollback * -R Delete any snapshots and their clones * -f ignored for backwards compatability * * Given a filesystem, rollback to a specific snapshot, discarding any changes * since then and making it the active dataset. If more recent snapshots exist, * the command will complain unless the '-r' flag is given. */ typedef struct rollback_cbdata { uint64_t cb_create; boolean_t cb_first; int cb_doclones; char *cb_target; int cb_error; boolean_t cb_recurse; boolean_t cb_dependent; } rollback_cbdata_t; /* * Report any snapshots more recent than the one specified. Used when '-r' is * not specified. We reuse this same callback for the snapshot dependents - if * 'cb_dependent' is set, then this is a dependent and we should report it * without checking the transaction group. */ static int rollback_check(zfs_handle_t *zhp, void *data) { rollback_cbdata_t *cbp = data; if (cbp->cb_doclones) { zfs_close(zhp); return (0); } if (!cbp->cb_dependent) { if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { if (cbp->cb_first && !cbp->cb_recurse) { (void) fprintf(stderr, gettext("cannot " "rollback to '%s': more recent snapshots " "exist\n"), cbp->cb_target); (void) fprintf(stderr, gettext("use '-r' to " "force deletion of the following " "snapshots:\n")); cbp->cb_first = 0; cbp->cb_error = 1; } if (cbp->cb_recurse) { cbp->cb_dependent = B_TRUE; if (zfs_iter_dependents(zhp, B_TRUE, rollback_check, cbp) != 0) { zfs_close(zhp); return (-1); } cbp->cb_dependent = B_FALSE; } else { (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); } } } else { if (cbp->cb_first && cbp->cb_recurse) { (void) fprintf(stderr, gettext("cannot rollback to " "'%s': clones of previous snapshots exist\n"), cbp->cb_target); (void) fprintf(stderr, gettext("use '-R' to " "force deletion of the following clones and " "dependents:\n")); cbp->cb_first = 0; cbp->cb_error = 1; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); } zfs_close(zhp); return (0); } static int zfs_do_rollback(int argc, char **argv) { int ret = 0; int c; boolean_t force = B_FALSE; rollback_cbdata_t cb = { 0 }; zfs_handle_t *zhp, *snap; char parentname[ZFS_MAXNAMELEN]; char *delim; /* check options */ while ((c = getopt(argc, argv, "rRf")) != -1) { switch (c) { case 'r': cb.cb_recurse = 1; break; case 'R': cb.cb_recurse = 1; cb.cb_doclones = 1; break; case 'f': force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing dataset argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } /* open the snapshot */ if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) return (1); /* open the parent dataset */ (void) strlcpy(parentname, argv[0], sizeof (parentname)); verify((delim = strrchr(parentname, '@')) != NULL); *delim = '\0'; if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) { zfs_close(snap); return (1); } /* * Check for more recent snapshots and/or clones based on the presence * of '-r' and '-R'. */ cb.cb_target = argv[0]; cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); cb.cb_first = B_TRUE; cb.cb_error = 0; if ((ret = zfs_iter_children(zhp, rollback_check, &cb)) != 0) goto out; if ((ret = cb.cb_error) != 0) goto out; /* * Rollback parent to the given snapshot. */ ret = zfs_rollback(zhp, snap, force); out: zfs_close(snap); zfs_close(zhp); if (ret == 0) return (0); else return (1); } /* * zfs set property=value { fs | snap | vol } ... * * Sets the given property for all datasets specified on the command line. */ typedef struct set_cbdata { char *cb_propname; char *cb_value; } set_cbdata_t; static int set_callback(zfs_handle_t *zhp, void *data) { set_cbdata_t *cbp = data; if (zfs_prop_set(zhp, cbp->cb_propname, cbp->cb_value) != 0) { switch (libzfs_errno(g_zfs)) { case EZFS_MOUNTFAILED: (void) fprintf(stderr, gettext("property may be set " "but unable to remount filesystem\n")); break; case EZFS_SHARENFSFAILED: (void) fprintf(stderr, gettext("property may be set " "but unable to reshare filesystem\n")); break; } return (1); } return (0); } static int zfs_do_set(int argc, char **argv) { set_cbdata_t cb; int ret = 0; /* check for options */ if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing property=value " "argument\n")); usage(B_FALSE); } if (argc < 3) { (void) fprintf(stderr, gettext("missing dataset name\n")); usage(B_FALSE); } /* validate property=value argument */ cb.cb_propname = argv[1]; if (((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) || (cb.cb_value[1] == '\0')) { (void) fprintf(stderr, gettext("missing value in " "property=value argument\n")); usage(B_FALSE); } *cb.cb_value = '\0'; cb.cb_value++; if (*cb.cb_propname == '\0') { (void) fprintf(stderr, gettext("missing property in property=value argument\n")); usage(B_FALSE); } ret = zfs_for_each(argc - 2, argv + 2, 0, ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb); return (ret); } /* * zfs snapshot [-r] [-o prop=value] ... * * Creates a snapshot with the given name. While functionally equivalent to * 'zfs create', it is a separate command to differentiate intent. */ static int zfs_do_snapshot(int argc, char **argv) { boolean_t recursive = B_FALSE; int ret = 0; signed char c; nvlist_t *props; if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) nomem(); /* check options */ while ((c = getopt(argc, argv, "ro:")) != -1) { switch (c) { case 'o': if (parseprop(props)) return (1); break; case 'r': recursive = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); goto usage; } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); goto usage; } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); goto usage; } ret = zfs_snapshot(g_zfs, argv[0], recursive, props); nvlist_free(props); if (ret && recursive) (void) fprintf(stderr, gettext("no snapshots were created\n")); return (ret != 0); usage: nvlist_free(props); usage(B_FALSE); return (-1); } /* - * zfs send [-vDp] -R [-i|-I <@snap>] - * zfs send [-vDp] [-i|-I <@snap>] - * * Send a backup stream to stdout. */ static int zfs_do_send(int argc, char **argv) { char *fromname = NULL; char *toname = NULL; char *cp; zfs_handle_t *zhp; sendflags_t flags = { 0 }; int c, err; - nvlist_t *dbgnv; + nvlist_t *dbgnv = NULL; boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) { switch (c) { case 'i': if (fromname) usage(B_FALSE); fromname = optarg; break; case 'I': if (fromname) usage(B_FALSE); fromname = optarg; flags.doall = B_TRUE; break; case 'R': flags.replicate = B_TRUE; break; case 'p': flags.props = B_TRUE; break; + case 'P': + flags.parsable = B_TRUE; + flags.verbose = B_TRUE; + break; case 'v': if (flags.verbose) extraverbose = B_TRUE; flags.verbose = B_TRUE; break; case 'D': flags.dedup = B_TRUE; break; + case 'n': + flags.dryrun = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } - if (isatty(STDOUT_FILENO)) { + if (!flags.dryrun && isatty(STDOUT_FILENO)) { (void) fprintf(stderr, gettext("Error: Stream can not be written to a terminal.\n" "You must redirect standard output.\n")); return (1); } cp = strchr(argv[0], '@'); if (cp == NULL) { (void) fprintf(stderr, gettext("argument must be a snapshot\n")); usage(B_FALSE); } *cp = '\0'; toname = cp + 1; zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (zhp == NULL) return (1); /* * If they specified the full path to the snapshot, chop off * everything except the short name of the snapshot, but special * case if they specify the origin. */ if (fromname && (cp = strchr(fromname, '@')) != NULL) { char origin[ZFS_MAXNAMELEN]; zprop_source_t src; (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), &src, NULL, 0, B_FALSE); if (strcmp(origin, fromname) == 0) { fromname = NULL; flags.fromorigin = B_TRUE; } else { *cp = '\0'; if (cp != fromname && strcmp(argv[0], fromname)) { (void) fprintf(stderr, gettext("incremental source must be " "in same filesystem\n")); usage(B_FALSE); } fromname = cp + 1; if (strchr(fromname, '@') || strchr(fromname, '/')) { (void) fprintf(stderr, gettext("invalid incremental source\n")); usage(B_FALSE); } } } if (flags.replicate && fromname == NULL) flags.doall = B_TRUE; - err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0, + err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0, extraverbose ? &dbgnv : NULL); - if (extraverbose) { + if (extraverbose && dbgnv != NULL) { /* * dump_nvlist prints to stdout, but that's been * redirected to a file. Make it print to stderr * instead. */ (void) dup2(STDERR_FILENO, STDOUT_FILENO); dump_nvlist(dbgnv, 0); nvlist_free(dbgnv); } zfs_close(zhp); return (err != 0); } /* * zfs receive [-vnFu] [-d | -e] * * Restore a backup stream from stdin. */ static int zfs_do_receive(int argc, char **argv) { int c, err; recvflags_t flags = { 0 }; /* check options */ while ((c = getopt(argc, argv, ":denuvF")) != -1) { switch (c) { case 'd': flags.isprefix = B_TRUE; break; case 'e': flags.isprefix = B_TRUE; flags.istail = B_TRUE; break; case 'n': flags.dryrun = B_TRUE; break; case 'u': flags.nomount = B_TRUE; break; case 'v': flags.verbose = B_TRUE; break; case 'F': flags.force = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " "from a terminal.\n" "You must redirect standard input.\n")); return (1); } - err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL); + err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL); return (err != 0); } /* * allow/unallow stuff */ /* copied from zfs/sys/dsl_deleg.h */ #define ZFS_DELEG_PERM_CREATE "create" #define ZFS_DELEG_PERM_DESTROY "destroy" #define ZFS_DELEG_PERM_SNAPSHOT "snapshot" #define ZFS_DELEG_PERM_ROLLBACK "rollback" #define ZFS_DELEG_PERM_CLONE "clone" #define ZFS_DELEG_PERM_PROMOTE "promote" #define ZFS_DELEG_PERM_RENAME "rename" #define ZFS_DELEG_PERM_MOUNT "mount" #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" #define ZFS_DELEG_PERM_RECEIVE "receive" #define ZFS_DELEG_PERM_ALLOW "allow" #define ZFS_DELEG_PERM_USERPROP "userprop" #define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */ #define ZFS_DELEG_PERM_USERQUOTA "userquota" #define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" #define ZFS_DELEG_PERM_USERUSED "userused" #define ZFS_DELEG_PERM_GROUPUSED "groupused" #define ZFS_DELEG_PERM_HOLD "hold" #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW }, { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, { NULL, ZFS_DELEG_NOTE_NONE } }; /* permission structure */ typedef struct deleg_perm { zfs_deleg_who_type_t dp_who_type; const char *dp_name; boolean_t dp_local; boolean_t dp_descend; } deleg_perm_t; /* */ typedef struct deleg_perm_node { deleg_perm_t dpn_perm; uu_avl_node_t dpn_avl_node; } deleg_perm_node_t; typedef struct fs_perm fs_perm_t; /* permissions set */ typedef struct who_perm { zfs_deleg_who_type_t who_type; const char *who_name; /* id */ char who_ug_name[256]; /* user/group name */ fs_perm_t *who_fsperm; /* uplink */ uu_avl_t *who_deleg_perm_avl; /* permissions */ } who_perm_t; /* */ typedef struct who_perm_node { who_perm_t who_perm; uu_avl_node_t who_avl_node; } who_perm_node_t; typedef struct fs_perm_set fs_perm_set_t; /* fs permissions */ struct fs_perm { const char *fsp_name; uu_avl_t *fsp_sc_avl; /* sets,create */ uu_avl_t *fsp_uge_avl; /* user,group,everyone */ fs_perm_set_t *fsp_set; /* uplink */ }; /* */ typedef struct fs_perm_node { fs_perm_t fspn_fsperm; uu_avl_t *fspn_avl; uu_list_node_t fspn_list_node; } fs_perm_node_t; /* top level structure */ struct fs_perm_set { uu_list_pool_t *fsps_list_pool; uu_list_t *fsps_list; /* list of fs_perms */ uu_avl_pool_t *fsps_named_set_avl_pool; uu_avl_pool_t *fsps_who_perm_avl_pool; uu_avl_pool_t *fsps_deleg_perm_avl_pool; }; static inline const char * deleg_perm_type(zfs_deleg_note_t note) { /* subcommands */ switch (note) { /* SUBCOMMANDS */ /* OTHER */ case ZFS_DELEG_NOTE_GROUPQUOTA: case ZFS_DELEG_NOTE_GROUPUSED: case ZFS_DELEG_NOTE_USERPROP: case ZFS_DELEG_NOTE_USERQUOTA: case ZFS_DELEG_NOTE_USERUSED: /* other */ return (gettext("other")); default: return (gettext("subcommand")); } } static int inline who_type2weight(zfs_deleg_who_type_t who_type) { int res; switch (who_type) { case ZFS_DELEG_NAMED_SET_SETS: case ZFS_DELEG_NAMED_SET: res = 0; break; case ZFS_DELEG_CREATE_SETS: case ZFS_DELEG_CREATE: res = 1; break; case ZFS_DELEG_USER_SETS: case ZFS_DELEG_USER: res = 2; break; case ZFS_DELEG_GROUP_SETS: case ZFS_DELEG_GROUP: res = 3; break; case ZFS_DELEG_EVERYONE_SETS: case ZFS_DELEG_EVERYONE: res = 4; break; default: res = -1; } return (res); } /* ARGSUSED */ static int who_perm_compare(const void *larg, const void *rarg, void *unused) { const who_perm_node_t *l = larg; const who_perm_node_t *r = rarg; zfs_deleg_who_type_t ltype = l->who_perm.who_type; zfs_deleg_who_type_t rtype = r->who_perm.who_type; int lweight = who_type2weight(ltype); int rweight = who_type2weight(rtype); int res = lweight - rweight; if (res == 0) res = strncmp(l->who_perm.who_name, r->who_perm.who_name, ZFS_MAX_DELEG_NAME-1); if (res == 0) return (0); if (res > 0) return (1); else return (-1); } /* ARGSUSED */ static int deleg_perm_compare(const void *larg, const void *rarg, void *unused) { const deleg_perm_node_t *l = larg; const deleg_perm_node_t *r = rarg; int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name, ZFS_MAX_DELEG_NAME-1); if (res == 0) return (0); if (res > 0) return (1); else return (-1); } static inline void fs_perm_set_init(fs_perm_set_t *fspset) { bzero(fspset, sizeof (fs_perm_set_t)); if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool", sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node), NULL, UU_DEFAULT)) == NULL) nomem(); if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL, UU_DEFAULT)) == NULL) nomem(); if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create( "named_set_avl_pool", sizeof (who_perm_node_t), offsetof( who_perm_node_t, who_avl_node), who_perm_compare, UU_DEFAULT)) == NULL) nomem(); if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create( "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof( who_perm_node_t, who_avl_node), who_perm_compare, UU_DEFAULT)) == NULL) nomem(); if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create( "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof( deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT)) == NULL) nomem(); } static inline void fs_perm_fini(fs_perm_t *); static inline void who_perm_fini(who_perm_t *); static inline void fs_perm_set_fini(fs_perm_set_t *fspset) { fs_perm_node_t *node = uu_list_first(fspset->fsps_list); while (node != NULL) { fs_perm_node_t *next_node = uu_list_next(fspset->fsps_list, node); fs_perm_t *fsperm = &node->fspn_fsperm; fs_perm_fini(fsperm); uu_list_remove(fspset->fsps_list, node); free(node); node = next_node; } uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool); uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool); uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool); } static inline void deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type, const char *name) { deleg_perm->dp_who_type = type; deleg_perm->dp_name = name; } static inline void who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm, zfs_deleg_who_type_t type, const char *name) { uu_avl_pool_t *pool; pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool; bzero(who_perm, sizeof (who_perm_t)); if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) nomem(); who_perm->who_type = type; who_perm->who_name = name; who_perm->who_fsperm = fsperm; } static inline void who_perm_fini(who_perm_t *who_perm) { deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl); while (node != NULL) { deleg_perm_node_t *next_node = uu_avl_next(who_perm->who_deleg_perm_avl, node); uu_avl_remove(who_perm->who_deleg_perm_avl, node); free(node); node = next_node; } uu_avl_destroy(who_perm->who_deleg_perm_avl); } static inline void fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname) { uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool; uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool; bzero(fsperm, sizeof (fs_perm_t)); if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT)) == NULL) nomem(); if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT)) == NULL) nomem(); fsperm->fsp_set = fspset; fsperm->fsp_name = fsname; } static inline void fs_perm_fini(fs_perm_t *fsperm) { who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl); while (node != NULL) { who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl, node); who_perm_t *who_perm = &node->who_perm; who_perm_fini(who_perm); uu_avl_remove(fsperm->fsp_sc_avl, node); free(node); node = next_node; } node = uu_avl_first(fsperm->fsp_uge_avl); while (node != NULL) { who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl, node); who_perm_t *who_perm = &node->who_perm; who_perm_fini(who_perm); uu_avl_remove(fsperm->fsp_uge_avl, node); free(node); node = next_node; } uu_avl_destroy(fsperm->fsp_sc_avl); uu_avl_destroy(fsperm->fsp_uge_avl); } static void inline set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node, zfs_deleg_who_type_t who_type, const char *name, char locality) { uu_avl_index_t idx = 0; deleg_perm_node_t *found_node = NULL; deleg_perm_t *deleg_perm = &node->dpn_perm; deleg_perm_init(deleg_perm, who_type, name); if ((found_node = uu_avl_find(avl, node, NULL, &idx)) == NULL) uu_avl_insert(avl, node, idx); else { node = found_node; deleg_perm = &node->dpn_perm; } switch (locality) { case ZFS_DELEG_LOCAL: deleg_perm->dp_local = B_TRUE; break; case ZFS_DELEG_DESCENDENT: deleg_perm->dp_descend = B_TRUE; break; case ZFS_DELEG_NA: break; default: assert(B_FALSE); /* invalid locality */ } } static inline int parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality) { nvpair_t *nvp = NULL; fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set; uu_avl_t *avl = who_perm->who_deleg_perm_avl; zfs_deleg_who_type_t who_type = who_perm->who_type; while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { const char *name = nvpair_name(nvp); data_type_t type = nvpair_type(nvp); uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool; deleg_perm_node_t *node = safe_malloc(sizeof (deleg_perm_node_t)); VERIFY(type == DATA_TYPE_BOOLEAN); uu_avl_node_init(node, &node->dpn_avl_node, avl_pool); set_deleg_perm_node(avl, node, who_type, name, locality); } return (0); } static inline int parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) { nvpair_t *nvp = NULL; fs_perm_set_t *fspset = fsperm->fsp_set; while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { nvlist_t *nvl2 = NULL; const char *name = nvpair_name(nvp); uu_avl_t *avl = NULL; uu_avl_pool_t *avl_pool = NULL; zfs_deleg_who_type_t perm_type = name[0]; char perm_locality = name[1]; const char *perm_name = name + 3; boolean_t is_set = B_TRUE; who_perm_t *who_perm = NULL; assert('$' == name[2]); if (nvpair_value_nvlist(nvp, &nvl2) != 0) return (-1); switch (perm_type) { case ZFS_DELEG_CREATE: case ZFS_DELEG_CREATE_SETS: case ZFS_DELEG_NAMED_SET: case ZFS_DELEG_NAMED_SET_SETS: avl_pool = fspset->fsps_named_set_avl_pool; avl = fsperm->fsp_sc_avl; break; case ZFS_DELEG_USER: case ZFS_DELEG_USER_SETS: case ZFS_DELEG_GROUP: case ZFS_DELEG_GROUP_SETS: case ZFS_DELEG_EVERYONE: case ZFS_DELEG_EVERYONE_SETS: avl_pool = fspset->fsps_who_perm_avl_pool; avl = fsperm->fsp_uge_avl; break; default: break; } if (is_set) { who_perm_node_t *found_node = NULL; who_perm_node_t *node = safe_malloc( sizeof (who_perm_node_t)); who_perm = &node->who_perm; uu_avl_index_t idx = 0; uu_avl_node_init(node, &node->who_avl_node, avl_pool); who_perm_init(who_perm, fsperm, perm_type, perm_name); if ((found_node = uu_avl_find(avl, node, NULL, &idx)) == NULL) { if (avl == fsperm->fsp_uge_avl) { uid_t rid = 0; struct passwd *p = NULL; struct group *g = NULL; const char *nice_name = NULL; switch (perm_type) { case ZFS_DELEG_USER_SETS: case ZFS_DELEG_USER: rid = atoi(perm_name); p = getpwuid(rid); if (p) nice_name = p->pw_name; break; case ZFS_DELEG_GROUP_SETS: case ZFS_DELEG_GROUP: rid = atoi(perm_name); g = getgrgid(rid); if (g) nice_name = g->gr_name; break; default: break; } if (nice_name != NULL) (void) strlcpy( node->who_perm.who_ug_name, nice_name, 256); } uu_avl_insert(avl, node, idx); } else { node = found_node; who_perm = &node->who_perm; } } (void) parse_who_perm(who_perm, nvl2, perm_locality); } return (0); } static inline int parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) { nvpair_t *nvp = NULL; uu_avl_index_t idx = 0; while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { nvlist_t *nvl2 = NULL; const char *fsname = nvpair_name(nvp); data_type_t type = nvpair_type(nvp); fs_perm_t *fsperm = NULL; fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t)); if (node == NULL) nomem(); fsperm = &node->fspn_fsperm; VERIFY(DATA_TYPE_NVLIST == type); uu_list_node_init(node, &node->fspn_list_node, fspset->fsps_list_pool); idx = uu_list_numnodes(fspset->fsps_list); fs_perm_init(fsperm, fspset, fsname); if (nvpair_value_nvlist(nvp, &nvl2) != 0) return (-1); (void) parse_fs_perm(fsperm, nvl2); uu_list_insert(fspset->fsps_list, node, idx); } return (0); } static inline const char * deleg_perm_comment(zfs_deleg_note_t note) { const char *str = ""; /* subcommands */ switch (note) { /* SUBCOMMANDS */ case ZFS_DELEG_NOTE_ALLOW: str = gettext("Must also have the permission that is being" "\n\t\t\t\tallowed"); break; case ZFS_DELEG_NOTE_CLONE: str = gettext("Must also have the 'create' ability and 'mount'" "\n\t\t\t\tability in the origin file system"); break; case ZFS_DELEG_NOTE_CREATE: str = gettext("Must also have the 'mount' ability"); break; case ZFS_DELEG_NOTE_DESTROY: str = gettext("Must also have the 'mount' ability"); break; case ZFS_DELEG_NOTE_DIFF: str = gettext("Allows lookup of paths within a dataset;" "\n\t\t\t\tgiven an object number. Ordinary users need this" "\n\t\t\t\tin order to use zfs diff"); break; case ZFS_DELEG_NOTE_HOLD: str = gettext("Allows adding a user hold to a snapshot"); break; case ZFS_DELEG_NOTE_MOUNT: str = gettext("Allows mount/umount of ZFS datasets"); break; case ZFS_DELEG_NOTE_PROMOTE: str = gettext("Must also have the 'mount'\n\t\t\t\tand" " 'promote' ability in the origin file system"); break; case ZFS_DELEG_NOTE_RECEIVE: str = gettext("Must also have the 'mount' and 'create'" " ability"); break; case ZFS_DELEG_NOTE_RELEASE: str = gettext("Allows releasing a user hold which\n\t\t\t\t" "might destroy the snapshot"); break; case ZFS_DELEG_NOTE_RENAME: str = gettext("Must also have the 'mount' and 'create'" "\n\t\t\t\tability in the new parent"); break; case ZFS_DELEG_NOTE_ROLLBACK: str = gettext(""); break; case ZFS_DELEG_NOTE_SEND: str = gettext(""); break; case ZFS_DELEG_NOTE_SHARE: str = gettext("Allows sharing file systems over NFS or SMB" "\n\t\t\t\tprotocols"); break; case ZFS_DELEG_NOTE_SNAPSHOT: str = gettext(""); break; /* * case ZFS_DELEG_NOTE_VSCAN: * str = gettext(""); * break; */ /* OTHER */ case ZFS_DELEG_NOTE_GROUPQUOTA: str = gettext("Allows accessing any groupquota@... property"); break; case ZFS_DELEG_NOTE_GROUPUSED: str = gettext("Allows reading any groupused@... property"); break; case ZFS_DELEG_NOTE_USERPROP: str = gettext("Allows changing any user property"); break; case ZFS_DELEG_NOTE_USERQUOTA: str = gettext("Allows accessing any userquota@... property"); break; case ZFS_DELEG_NOTE_USERUSED: str = gettext("Allows reading any userused@... property"); break; /* other */ default: str = ""; } return (str); } struct allow_opts { boolean_t local; boolean_t descend; boolean_t user; boolean_t group; boolean_t everyone; boolean_t create; boolean_t set; boolean_t recursive; /* unallow only */ boolean_t prt_usage; boolean_t prt_perms; char *who; char *perms; const char *dataset; }; static inline int prop_cmp(const void *a, const void *b) { const char *str1 = *(const char **)a; const char *str2 = *(const char **)b; return (strcmp(str1, str2)); } static void allow_usage(boolean_t un, boolean_t requested, const char *msg) { const char *opt_desc[] = { "-h", gettext("show this help message and exit"), "-l", gettext("set permission locally"), "-d", gettext("set permission for descents"), "-u", gettext("set permission for user"), "-g", gettext("set permission for group"), "-e", gettext("set permission for everyone"), "-c", gettext("set create time permission"), "-s", gettext("define permission set"), /* unallow only */ "-r", gettext("remove permissions recursively"), }; size_t unallow_size = sizeof (opt_desc) / sizeof (char *); size_t allow_size = unallow_size - 2; const char *props[ZFS_NUM_PROPS]; int i; size_t count = 0; FILE *fp = requested ? stdout : stderr; zprop_desc_t *pdtbl = zfs_prop_get_table(); const char *fmt = gettext("%-16s %-14s\t%s\n"); (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW : HELP_ALLOW)); (void) fprintf(fp, gettext("Options:\n")); for (i = 0; i < (un ? unallow_size : allow_size); i++) { const char *opt = opt_desc[i++]; const char *optdsc = opt_desc[i]; (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc); } (void) fprintf(fp, gettext("\nThe following permissions are " "supported:\n\n")); (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"), gettext("NOTES")); for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) { const char *perm_name = zfs_deleg_perm_tbl[i].z_perm; zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note; const char *perm_type = deleg_perm_type(perm_note); const char *perm_comment = deleg_perm_comment(perm_note); (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment); } for (i = 0; i < ZFS_NUM_PROPS; i++) { zprop_desc_t *pd = &pdtbl[i]; if (pd->pd_visible != B_TRUE) continue; if (pd->pd_attr == PROP_READONLY) continue; props[count++] = pd->pd_name; } props[count] = NULL; qsort(props, count, sizeof (char *), prop_cmp); for (i = 0; i < count; i++) (void) fprintf(fp, fmt, props[i], gettext("property"), ""); if (msg != NULL) (void) fprintf(fp, gettext("\nzfs: error: %s"), msg); exit(requested ? 0 : 2); } static inline const char * munge_args(int argc, char **argv, boolean_t un, size_t expected_argc, char **permsp) { if (un && argc == expected_argc - 1) *permsp = NULL; else if (argc == expected_argc) *permsp = argv[argc - 2]; else allow_usage(un, B_FALSE, gettext("wrong number of parameters\n")); return (argv[argc - 1]); } static void parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts) { int uge_sum = opts->user + opts->group + opts->everyone; int csuge_sum = opts->create + opts->set + uge_sum; int ldcsuge_sum = csuge_sum + opts->local + opts->descend; int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum; if (uge_sum > 1) allow_usage(un, B_FALSE, gettext("-u, -g, and -e are mutually exclusive\n")); if (opts->prt_usage) { if (argc == 0 && all_sum == 0) allow_usage(un, B_TRUE, NULL); else usage(B_FALSE); } if (opts->set) { if (csuge_sum > 1) allow_usage(un, B_FALSE, gettext("invalid options combined with -s\n")); opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); if (argv[0][0] != '@') allow_usage(un, B_FALSE, gettext("invalid set name: missing '@' prefix\n")); opts->who = argv[0]; } else if (opts->create) { if (ldcsuge_sum > 1) allow_usage(un, B_FALSE, gettext("invalid options combined with -c\n")); opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); } else if (opts->everyone) { if (csuge_sum > 1) allow_usage(un, B_FALSE, gettext("invalid options combined with -e\n")); opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone") == 0) { opts->everyone = B_TRUE; argc--; argv++; opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); } else if (argc == 1) { opts->prt_perms = B_TRUE; opts->dataset = argv[argc-1]; } else { opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); opts->who = argv[0]; } if (!opts->local && !opts->descend) { opts->local = B_TRUE; opts->descend = B_TRUE; } } static void store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, const char *who, char *perms, nvlist_t *top_nvl) { int i; char ld[2] = { '\0', '\0' }; char who_buf[ZFS_MAXNAMELEN+32]; char base_type = ZFS_DELEG_WHO_UNKNOWN; char set_type = ZFS_DELEG_WHO_UNKNOWN; nvlist_t *base_nvl = NULL; nvlist_t *set_nvl = NULL; nvlist_t *nvl; if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0) nomem(); if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0) nomem(); switch (type) { case ZFS_DELEG_NAMED_SET_SETS: case ZFS_DELEG_NAMED_SET: set_type = ZFS_DELEG_NAMED_SET_SETS; base_type = ZFS_DELEG_NAMED_SET; ld[0] = ZFS_DELEG_NA; break; case ZFS_DELEG_CREATE_SETS: case ZFS_DELEG_CREATE: set_type = ZFS_DELEG_CREATE_SETS; base_type = ZFS_DELEG_CREATE; ld[0] = ZFS_DELEG_NA; break; case ZFS_DELEG_USER_SETS: case ZFS_DELEG_USER: set_type = ZFS_DELEG_USER_SETS; base_type = ZFS_DELEG_USER; if (local) ld[0] = ZFS_DELEG_LOCAL; if (descend) ld[1] = ZFS_DELEG_DESCENDENT; break; case ZFS_DELEG_GROUP_SETS: case ZFS_DELEG_GROUP: set_type = ZFS_DELEG_GROUP_SETS; base_type = ZFS_DELEG_GROUP; if (local) ld[0] = ZFS_DELEG_LOCAL; if (descend) ld[1] = ZFS_DELEG_DESCENDENT; break; case ZFS_DELEG_EVERYONE_SETS: case ZFS_DELEG_EVERYONE: set_type = ZFS_DELEG_EVERYONE_SETS; base_type = ZFS_DELEG_EVERYONE; if (local) ld[0] = ZFS_DELEG_LOCAL; if (descend) ld[1] = ZFS_DELEG_DESCENDENT; default: break; } if (perms != NULL) { char *curr = perms; char *end = curr + strlen(perms); while (curr < end) { char *delim = strchr(curr, ','); if (delim == NULL) delim = end; else *delim = '\0'; if (curr[0] == '@') nvl = set_nvl; else nvl = base_nvl; (void) nvlist_add_boolean(nvl, curr); if (delim != end) *delim = ','; curr = delim + 1; } for (i = 0; i < 2; i++) { char locality = ld[i]; if (locality == 0) continue; if (!nvlist_empty(base_nvl)) { if (who != NULL) (void) snprintf(who_buf, sizeof (who_buf), "%c%c$%s", base_type, locality, who); else (void) snprintf(who_buf, sizeof (who_buf), "%c%c$", base_type, locality); (void) nvlist_add_nvlist(top_nvl, who_buf, base_nvl); } if (!nvlist_empty(set_nvl)) { if (who != NULL) (void) snprintf(who_buf, sizeof (who_buf), "%c%c$%s", set_type, locality, who); else (void) snprintf(who_buf, sizeof (who_buf), "%c%c$", set_type, locality); (void) nvlist_add_nvlist(top_nvl, who_buf, set_nvl); } } } else { for (i = 0; i < 2; i++) { char locality = ld[i]; if (locality == 0) continue; if (who != NULL) (void) snprintf(who_buf, sizeof (who_buf), "%c%c$%s", base_type, locality, who); else (void) snprintf(who_buf, sizeof (who_buf), "%c%c$", base_type, locality); (void) nvlist_add_boolean(top_nvl, who_buf); if (who != NULL) (void) snprintf(who_buf, sizeof (who_buf), "%c%c$%s", set_type, locality, who); else (void) snprintf(who_buf, sizeof (who_buf), "%c%c$", set_type, locality); (void) nvlist_add_boolean(top_nvl, who_buf); } } } static int construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp) { if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0) nomem(); if (opts->set) { store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local, opts->descend, opts->who, opts->perms, *nvlp); } else if (opts->create) { store_allow_perm(ZFS_DELEG_CREATE, opts->local, opts->descend, NULL, opts->perms, *nvlp); } else if (opts->everyone) { store_allow_perm(ZFS_DELEG_EVERYONE, opts->local, opts->descend, NULL, opts->perms, *nvlp); } else { char *curr = opts->who; char *end = curr + strlen(curr); while (curr < end) { const char *who; zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN; char *endch; char *delim = strchr(curr, ','); char errbuf[256]; char id[64]; struct passwd *p = NULL; struct group *g = NULL; uid_t rid; if (delim == NULL) delim = end; else *delim = '\0'; rid = (uid_t)strtol(curr, &endch, 0); if (opts->user) { who_type = ZFS_DELEG_USER; if (*endch != '\0') p = getpwnam(curr); else p = getpwuid(rid); if (p != NULL) rid = p->pw_uid; else { (void) snprintf(errbuf, 256, gettext( "invalid user %s"), curr); allow_usage(un, B_TRUE, errbuf); } } else if (opts->group) { who_type = ZFS_DELEG_GROUP; if (*endch != '\0') g = getgrnam(curr); else g = getgrgid(rid); if (g != NULL) rid = g->gr_gid; else { (void) snprintf(errbuf, 256, gettext( "invalid group %s"), curr); allow_usage(un, B_TRUE, errbuf); } } else { if (*endch != '\0') { p = getpwnam(curr); } else { p = getpwuid(rid); } if (p == NULL) { if (*endch != '\0') { g = getgrnam(curr); } else { g = getgrgid(rid); } } if (p != NULL) { who_type = ZFS_DELEG_USER; rid = p->pw_uid; } else if (g != NULL) { who_type = ZFS_DELEG_GROUP; rid = g->gr_gid; } else { (void) snprintf(errbuf, 256, gettext( "invalid user/group %s"), curr); allow_usage(un, B_TRUE, errbuf); } } (void) sprintf(id, "%u", rid); who = id; store_allow_perm(who_type, opts->local, opts->descend, who, opts->perms, *nvlp); curr = delim + 1; } } return (0); } static void print_set_creat_perms(uu_avl_t *who_avl) { const char *sc_title[] = { gettext("Permission sets:\n"), gettext("Create time permissions:\n"), NULL }; const char **title_ptr = sc_title; who_perm_node_t *who_node = NULL; int prev_weight = -1; for (who_node = uu_avl_first(who_avl); who_node != NULL; who_node = uu_avl_next(who_avl, who_node)) { uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; const char *who_name = who_node->who_perm.who_name; int weight = who_type2weight(who_type); boolean_t first = B_TRUE; deleg_perm_node_t *deleg_node; if (prev_weight != weight) { (void) printf("%s", *title_ptr++); prev_weight = weight; } if (who_name == NULL || strnlen(who_name, 1) == 0) (void) printf("\t"); else (void) printf("\t%s ", who_name); for (deleg_node = uu_avl_first(avl); deleg_node != NULL; deleg_node = uu_avl_next(avl, deleg_node)) { if (first) { (void) printf("%s", deleg_node->dpn_perm.dp_name); first = B_FALSE; } else (void) printf(",%s", deleg_node->dpn_perm.dp_name); } (void) printf("\n"); } } static void inline print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend, const char *title) { who_perm_node_t *who_node = NULL; boolean_t prt_title = B_TRUE; uu_avl_walk_t *walk; if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL) nomem(); while ((who_node = uu_avl_walk_next(walk)) != NULL) { const char *who_name = who_node->who_perm.who_name; const char *nice_who_name = who_node->who_perm.who_ug_name; uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; char delim = ' '; deleg_perm_node_t *deleg_node; boolean_t prt_who = B_TRUE; for (deleg_node = uu_avl_first(avl); deleg_node != NULL; deleg_node = uu_avl_next(avl, deleg_node)) { if (local != deleg_node->dpn_perm.dp_local || descend != deleg_node->dpn_perm.dp_descend) continue; if (prt_who) { const char *who = NULL; if (prt_title) { prt_title = B_FALSE; (void) printf("%s", title); } switch (who_type) { case ZFS_DELEG_USER_SETS: case ZFS_DELEG_USER: who = gettext("user"); if (nice_who_name) who_name = nice_who_name; break; case ZFS_DELEG_GROUP_SETS: case ZFS_DELEG_GROUP: who = gettext("group"); if (nice_who_name) who_name = nice_who_name; break; case ZFS_DELEG_EVERYONE_SETS: case ZFS_DELEG_EVERYONE: who = gettext("everyone"); who_name = NULL; default: break; } prt_who = B_FALSE; if (who_name == NULL) (void) printf("\t%s", who); else (void) printf("\t%s %s", who, who_name); } (void) printf("%c%s", delim, deleg_node->dpn_perm.dp_name); delim = ','; } if (!prt_who) (void) printf("\n"); } uu_avl_walk_end(walk); } static void print_fs_perms(fs_perm_set_t *fspset) { fs_perm_node_t *node = NULL; char buf[ZFS_MAXNAMELEN+32]; const char *dsname = buf; for (node = uu_list_first(fspset->fsps_list); node != NULL; node = uu_list_next(fspset->fsps_list, node)) { uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl; uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; int left = 0; (void) snprintf(buf, ZFS_MAXNAMELEN+32, gettext("---- Permissions on %s "), node->fspn_fsperm.fsp_name); (void) printf("%s", dsname); left = 70 - strlen(buf); while (left-- > 0) (void) printf("-"); (void) printf("\n"); print_set_creat_perms(sc_avl); print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE, gettext("Local permissions:\n")); print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE, gettext("Descendent permissions:\n")); print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE, gettext("Local+Descendent permissions:\n")); } } static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL }; struct deleg_perms { boolean_t un; nvlist_t *nvl; }; static int set_deleg_perms(zfs_handle_t *zhp, void *data) { struct deleg_perms *perms = (struct deleg_perms *)data; zfs_type_t zfs_type = zfs_get_type(zhp); if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME) return (0); return (zfs_set_fsacl(zhp, perms->un, perms->nvl)); } static int zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) { zfs_handle_t *zhp; nvlist_t *perm_nvl = NULL; nvlist_t *update_perm_nvl = NULL; int error = 1; int c; struct allow_opts opts = { 0 }; const char *optstr = un ? "ldugecsrh" : "ldugecsh"; /* check opts */ while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'l': opts.local = B_TRUE; break; case 'd': opts.descend = B_TRUE; break; case 'u': opts.user = B_TRUE; break; case 'g': opts.group = B_TRUE; break; case 'e': opts.everyone = B_TRUE; break; case 's': opts.set = B_TRUE; break; case 'c': opts.create = B_TRUE; break; case 'r': opts.recursive = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case 'h': opts.prt_usage = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check arguments */ parse_allow_args(argc, argv, un, &opts); /* try to open the dataset */ if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM)) == NULL) { (void) fprintf(stderr, "Failed to open Dataset *%s*\n", opts.dataset); return (-1); } if (zfs_get_fsacl(zhp, &perm_nvl) != 0) goto cleanup2; fs_perm_set_init(&fs_perm_set); if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) { (void) fprintf(stderr, "Failed to parse fsacl permissionsn"); goto cleanup1; } if (opts.prt_perms) print_fs_perms(&fs_perm_set); else { (void) construct_fsacl_list(un, &opts, &update_perm_nvl); if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0) goto cleanup0; if (un && opts.recursive) { struct deleg_perms data = { un, update_perm_nvl }; if (zfs_iter_filesystems(zhp, set_deleg_perms, &data) != 0) goto cleanup0; } } error = 0; cleanup0: nvlist_free(perm_nvl); if (update_perm_nvl != NULL) nvlist_free(update_perm_nvl); cleanup1: fs_perm_set_fini(&fs_perm_set); cleanup2: zfs_close(zhp); return (error); } /* * zfs allow [-r] [-t] ... * * -r Recursively hold * -t Temporary hold (hidden option) * * Apply a user-hold with the given tag to the list of snapshots. */ static int zfs_do_allow(int argc, char **argv) { return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); } /* * zfs unallow [-r] [-t] ... * * -r Recursively hold * -t Temporary hold (hidden option) * * Apply a user-hold with the given tag to the list of snapshots. */ static int zfs_do_unallow(int argc, char **argv) { return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE)); } static int zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) { int errors = 0; int i; const char *tag; boolean_t recursive = B_FALSE; boolean_t temphold = B_FALSE; const char *opts = holding ? "rt" : "r"; int c; /* check options */ while ((c = getopt(argc, argv, opts)) != -1) { switch (c) { case 'r': recursive = B_TRUE; break; case 't': temphold = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 2) usage(B_FALSE); tag = argv[0]; --argc; ++argv; if (holding && tag[0] == '.') { /* tags starting with '.' are reserved for libzfs */ (void) fprintf(stderr, gettext("tag may not start with '.'\n")); usage(B_FALSE); } for (i = 0; i < argc; ++i) { zfs_handle_t *zhp; char parent[ZFS_MAXNAMELEN]; const char *delim; char *path = argv[i]; delim = strchr(path, '@'); if (delim == NULL) { (void) fprintf(stderr, gettext("'%s' is not a snapshot\n"), path); ++errors; continue; } (void) strncpy(parent, path, delim - path); parent[delim - path] = '\0'; zhp = zfs_open(g_zfs, parent, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (zhp == NULL) { ++errors; continue; } if (holding) { if (zfs_hold(zhp, delim+1, tag, recursive, temphold, B_FALSE, -1, 0, 0) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) ++errors; } zfs_close(zhp); } return (errors != 0); } /* * zfs hold [-r] [-t] ... * * -r Recursively hold * -t Temporary hold (hidden option) * * Apply a user-hold with the given tag to the list of snapshots. */ static int zfs_do_hold(int argc, char **argv) { return (zfs_do_hold_rele_impl(argc, argv, B_TRUE)); } /* * zfs release [-r] ... * * -r Recursively release * * Release a user-hold with the given tag from the list of snapshots. */ static int zfs_do_release(int argc, char **argv) { return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); } typedef struct holds_cbdata { boolean_t cb_recursive; const char *cb_snapname; nvlist_t **cb_nvlp; size_t cb_max_namelen; size_t cb_max_taglen; } holds_cbdata_t; #define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" #define DATETIME_BUF_LEN (32) /* * */ static void print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl) { int i; nvpair_t *nvp = NULL; char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" }; const char *col; if (!scripted) { for (i = 0; i < 3; i++) { col = gettext(hdr_cols[i]); if (i < 2) (void) printf("%-*s ", i ? tagwidth : nwidth, col); else (void) printf("%s\n", col); } } while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { char *zname = nvpair_name(nvp); nvlist_t *nvl2; nvpair_t *nvp2 = NULL; (void) nvpair_value_nvlist(nvp, &nvl2); while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) { char tsbuf[DATETIME_BUF_LEN]; char *tagname = nvpair_name(nvp2); uint64_t val = 0; time_t time; struct tm t; char sep = scripted ? '\t' : ' '; int sepnum = scripted ? 1 : 2; (void) nvpair_value_uint64(nvp2, &val); time = (time_t)val; (void) localtime_r(&time, &t); (void) strftime(tsbuf, DATETIME_BUF_LEN, gettext(STRFTIME_FMT_STR), &t); (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname, sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf); } } } /* * Generic callback function to list a dataset or snapshot. */ static int holds_callback(zfs_handle_t *zhp, void *data) { holds_cbdata_t *cbp = data; nvlist_t *top_nvl = *cbp->cb_nvlp; nvlist_t *nvl = NULL; nvpair_t *nvp = NULL; const char *zname = zfs_get_name(zhp); size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN); if (cbp->cb_recursive) { const char *snapname; char *delim = strchr(zname, '@'); if (delim == NULL) return (0); snapname = delim + 1; if (strcmp(cbp->cb_snapname, snapname)) return (0); } if (zfs_get_holds(zhp, &nvl) != 0) return (-1); if (znamelen > cbp->cb_max_namelen) cbp->cb_max_namelen = znamelen; while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { const char *tag = nvpair_name(nvp); size_t taglen = strnlen(tag, MAXNAMELEN); if (taglen > cbp->cb_max_taglen) cbp->cb_max_taglen = taglen; } return (nvlist_add_nvlist(top_nvl, zname, nvl)); } /* * zfs holds [-r] ... * * -r Recursively hold */ static int zfs_do_holds(int argc, char **argv) { int errors = 0; int c; int i; boolean_t scripted = B_FALSE; boolean_t recursive = B_FALSE; const char *opts = "rH"; nvlist_t *nvl; int types = ZFS_TYPE_SNAPSHOT; holds_cbdata_t cb = { 0 }; int limit = 0; int ret = 0; int flags = 0; /* check options */ while ((c = getopt(argc, argv, opts)) != -1) { switch (c) { case 'r': recursive = B_TRUE; break; case 'H': scripted = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } if (recursive) { types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; flags |= ZFS_ITER_RECURSE; } argc -= optind; argv += optind; /* check number of arguments */ if (argc < 1) usage(B_FALSE); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) nomem(); for (i = 0; i < argc; ++i) { char *snapshot = argv[i]; const char *delim; const char *snapname; delim = strchr(snapshot, '@'); if (delim == NULL) { (void) fprintf(stderr, gettext("'%s' is not a snapshot\n"), snapshot); ++errors; continue; } snapname = delim + 1; if (recursive) snapshot[delim - snapshot] = '\0'; cb.cb_recursive = recursive; cb.cb_snapname = snapname; cb.cb_nvlp = &nvl; /* * 1. collect holds data, set format options */ ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit, holds_callback, &cb); if (ret != 0) ++errors; } /* * 2. print holds data */ print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl); if (nvlist_empty(nvl)) (void) fprintf(stderr, gettext("no datasets available\n")); nvlist_free(nvl); return (0 != errors); } #define CHECK_SPINNER 30 #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ static int get_one_dataset(zfs_handle_t *zhp, void *data) { static char *spin[] = { "-", "\\", "|", "/" }; static int spinval = 0; static int spincheck = 0; static time_t last_spin_time = (time_t)0; get_all_cb_t *cbp = data; zfs_type_t type = zfs_get_type(zhp); if (cbp->cb_verbose) { if (--spincheck < 0) { time_t now = time(NULL); if (last_spin_time + SPINNER_TIME < now) { update_progress(spin[spinval++ % 4]); last_spin_time = now; } spincheck = CHECK_SPINNER; } } /* * Interate over any nested datasets. */ if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) { zfs_close(zhp); return (1); } /* * Skip any datasets whose type does not match. */ if ((type & ZFS_TYPE_FILESYSTEM) == 0) { zfs_close(zhp); return (0); } libzfs_add_handle(cbp, zhp); assert(cbp->cb_used <= cbp->cb_alloc); return (0); } static void get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) { get_all_cb_t cb = { 0 }; cb.cb_verbose = verbose; cb.cb_getone = get_one_dataset; if (verbose) set_progress_header(gettext("Reading ZFS config")); (void) zfs_iter_root(g_zfs, get_one_dataset, &cb); *dslist = cb.cb_handles; *count = cb.cb_used; if (verbose) finish_progress(gettext("done.")); } /* * Generic callback for sharing or mounting filesystems. Because the code is so * similar, we have a common function with an extra parameter to determine which * mode we are using. */ #define OP_SHARE 0x1 #define OP_MOUNT 0x2 /* * Share or mount a dataset. */ static int share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, boolean_t explicit, const char *options) { char mountpoint[ZFS_MAXPROPLEN]; char shareopts[ZFS_MAXPROPLEN]; char smbshareopts[ZFS_MAXPROPLEN]; const char *cmdname = op == OP_SHARE ? "share" : "mount"; struct mnttab mnt; uint64_t zoned, canmount; boolean_t shared_nfs, shared_smb; assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM); /* * Check to make sure we can mount/share this dataset. If we * are in the global zone and the filesystem is exported to a * local zone, or if we are in a local zone and the * filesystem is not exported, then it is an error. */ zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); if (zoned && getzoneid() == GLOBAL_ZONEID) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot %s '%s': " "dataset is exported to a local zone\n"), cmdname, zfs_get_name(zhp)); return (1); } else if (!zoned && getzoneid() != GLOBAL_ZONEID) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot %s '%s': " "permission denied\n"), cmdname, zfs_get_name(zhp)); return (1); } /* * Ignore any filesystems which don't apply to us. This * includes those with a legacy mountpoint, or those with * legacy share options. */ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts, sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0); if (op == OP_SHARE && strcmp(shareopts, "off") == 0 && strcmp(smbshareopts, "off") == 0) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot share '%s': " "legacy share\n"), zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use share(1M) to " "share this filesystem, or set " "sharenfs property on\n")); return (1); } /* * We cannot share or mount legacy filesystems. If the * shareopts is non-legacy but the mountpoint is legacy, we * treat it as a legacy share. */ if (strcmp(mountpoint, "legacy") == 0) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot %s '%s': " "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use %s(1M) to " "%s this filesystem\n"), cmdname, cmdname); return (1); } if (strcmp(mountpoint, "none") == 0) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot %s '%s': no " "mountpoint set\n"), cmdname, zfs_get_name(zhp)); return (1); } /* * canmount explicit outcome * on no pass through * on yes pass through * off no return 0 * off yes display error, return 1 * noauto no return 0 * noauto yes pass through */ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); if (canmount == ZFS_CANMOUNT_OFF) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot %s '%s': " "'canmount' property is set to 'off'\n"), cmdname, zfs_get_name(zhp)); return (1); } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) { return (0); } /* * At this point, we have verified that the mountpoint and/or * shareopts are appropriate for auto management. If the * filesystem is already mounted or shared, return (failing * for explicit requests); otherwise mount or share the * filesystem. */ switch (op) { case OP_SHARE: shared_nfs = zfs_is_shared_nfs(zhp, NULL); shared_smb = zfs_is_shared_smb(zhp, NULL); if ((shared_nfs && shared_smb) || ((shared_nfs && strcmp(shareopts, "on") == 0) && (strcmp(smbshareopts, "off") == 0)) || ((shared_smb && strcmp(smbshareopts, "on") == 0) && (strcmp(shareopts, "off") == 0))) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot share " "'%s': filesystem already shared\n"), zfs_get_name(zhp)); return (1); } if (!zfs_is_mounted(zhp, NULL) && zfs_mount(zhp, NULL, 0) != 0) return (1); if (protocol == NULL) { if (zfs_shareall(zhp) != 0) return (1); } else if (strcmp(protocol, "nfs") == 0) { if (zfs_share_nfs(zhp)) return (1); } else if (strcmp(protocol, "smb") == 0) { if (zfs_share_smb(zhp)) return (1); } else { (void) fprintf(stderr, gettext("cannot share " "'%s': invalid share type '%s' " "specified\n"), zfs_get_name(zhp), protocol); return (1); } break; case OP_MOUNT: if (options == NULL) mnt.mnt_mntopts = ""; else mnt.mnt_mntopts = (char *)options; if (!hasmntopt(&mnt, MNTOPT_REMOUNT) && zfs_is_mounted(zhp, NULL)) { if (!explicit) return (0); (void) fprintf(stderr, gettext("cannot mount " "'%s': filesystem already mounted\n"), zfs_get_name(zhp)); return (1); } if (zfs_mount(zhp, options, flags) != 0) return (1); break; } return (0); } /* * Reports progress in the form "(current/total)". Not thread-safe. */ static void report_mount_progress(int current, int total) { static time_t last_progress_time = 0; time_t now = time(NULL); char info[32]; /* report 1..n instead of 0..n-1 */ ++current; /* display header if we're here for the first time */ if (current == 1) { set_progress_header(gettext("Mounting ZFS filesystems")); } else if (current != total && last_progress_time + MOUNT_TIME >= now) { /* too soon to report again */ return; } last_progress_time = now; (void) sprintf(info, "(%d/%d)", current, total); if (current == total) finish_progress(info); else update_progress(info); } static void append_options(char *mntopts, char *newopts) { int len = strlen(mntopts); /* original length plus new string to append plus 1 for the comma */ if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) { (void) fprintf(stderr, gettext("the opts argument for " "'%s' option is too long (more than %d chars)\n"), "-o", MNT_LINE_MAX); usage(B_FALSE); } if (*mntopts) mntopts[len++] = ','; (void) strcpy(&mntopts[len], newopts); } static int share_mount(int op, int argc, char **argv) { int do_all = 0; boolean_t verbose = B_FALSE; int c, ret = 0; char *options = NULL; int flags = 0; /* check options */ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) != -1) { switch (c) { case 'a': do_all = 1; break; case 'v': verbose = B_TRUE; break; case 'o': if (*optarg == '\0') { (void) fprintf(stderr, gettext("empty mount " "options (-o) specified\n")); usage(B_FALSE); } if (options == NULL) options = safe_malloc(MNT_LINE_MAX + 1); /* option validation is done later */ append_options(options, optarg); break; case 'O': flags |= MS_OVERLAY; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check number of arguments */ if (do_all) { zfs_handle_t **dslist = NULL; size_t i, count = 0; char *protocol = NULL; if (op == OP_SHARE && argc > 0) { if (strcmp(argv[0], "nfs") != 0 && strcmp(argv[0], "smb") != 0) { (void) fprintf(stderr, gettext("share type " "must be 'nfs' or 'smb'\n")); usage(B_FALSE); } protocol = argv[0]; argc--; argv++; } if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } start_progress_timer(); get_all_datasets(&dslist, &count, verbose); if (count == 0) return (0); qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp); for (i = 0; i < count; i++) { if (verbose) report_mount_progress(i, count); if (share_mount_one(dslist[i], op, flags, protocol, B_FALSE, options) != 0) ret = 1; zfs_close(dslist[i]); } free(dslist); } else if (argc == 0) { struct mnttab entry; if ((op == OP_SHARE) || (options != NULL)) { (void) fprintf(stderr, gettext("missing filesystem " "argument (specify -a for all)\n")); usage(B_FALSE); } /* * When mount is given no arguments, go through /etc/mtab and * display any active ZFS mounts. We hide any snapshots, since * they are controlled automatically. */ rewind(mnttab_file); while (getmntent(mnttab_file, &entry) == 0) { if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 || strchr(entry.mnt_special, '@') != NULL) continue; (void) printf("%-30s %s\n", entry.mnt_special, entry.mnt_mountp); } } else { zfs_handle_t *zhp; if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) { ret = 1; } else { ret = share_mount_one(zhp, op, flags, NULL, B_TRUE, options); zfs_close(zhp); } } return (ret); } /* * zfs mount -a [nfs] * zfs mount filesystem * * Mount all filesystems, or mount the given filesystem. */ static int zfs_do_mount(int argc, char **argv) { return (share_mount(OP_MOUNT, argc, argv)); } /* * zfs share -a [nfs | smb] * zfs share filesystem * * Share all filesystems, or share the given filesystem. */ static int zfs_do_share(int argc, char **argv) { return (share_mount(OP_SHARE, argc, argv)); } typedef struct unshare_unmount_node { zfs_handle_t *un_zhp; char *un_mountp; uu_avl_node_t un_avlnode; } unshare_unmount_node_t; /* ARGSUSED */ static int unshare_unmount_compare(const void *larg, const void *rarg, void *unused) { const unshare_unmount_node_t *l = larg; const unshare_unmount_node_t *r = rarg; return (strcmp(l->un_mountp, r->un_mountp)); } /* * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an * absolute path, find the entry /etc/mtab, verify that its a ZFS filesystem, * and unmount it appropriately. */ static int unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) { zfs_handle_t *zhp; int ret = 0; struct stat64 statbuf; struct extmnttab entry; const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount"; ino_t path_inode; /* * Search for the path in /etc/mtab. Rather than looking for the * specific path, which can be fooled by non-standard paths (i.e. ".." * or "//"), we stat() the path and search for the corresponding * (major,minor) device pair. */ if (stat64(path, &statbuf) != 0) { (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"), cmdname, path, strerror(errno)); return (1); } path_inode = statbuf.st_ino; /* * Search for the given (major,minor) pair in the mount table. */ rewind(mnttab_file); while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) { if (entry.mnt_major == major(statbuf.st_dev) && entry.mnt_minor == minor(statbuf.st_dev)) break; } if (ret != 0) { if (op == OP_SHARE) { (void) fprintf(stderr, gettext("cannot %s '%s': not " "currently mounted\n"), cmdname, path); return (1); } (void) fprintf(stderr, gettext("warning: %s not in mtab\n"), path); if ((ret = umount2(path, flags)) != 0) (void) fprintf(stderr, gettext("%s: %s\n"), path, strerror(errno)); return (ret != 0); } if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { (void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS " "filesystem\n"), cmdname, path); return (1); } if ((zhp = zfs_open(g_zfs, entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) return (1); ret = 1; if (stat64(entry.mnt_mountp, &statbuf) != 0) { (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"), cmdname, path, strerror(errno)); goto out; } else if (statbuf.st_ino != path_inode) { (void) fprintf(stderr, gettext("cannot " "%s '%s': not a mountpoint\n"), cmdname, path); goto out; } if (op == OP_SHARE) { char nfs_mnt_prop[ZFS_MAXPROPLEN]; char smbshare_prop[ZFS_MAXPROPLEN]; verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop, sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0); if (strcmp(nfs_mnt_prop, "off") == 0 && strcmp(smbshare_prop, "off") == 0) { (void) fprintf(stderr, gettext("cannot unshare " "'%s': legacy share\n"), path); (void) fprintf(stderr, gettext("use exportfs(8) " "or smbcontrol(1) to unshare this filesystem\n")); } else if (!zfs_is_shared(zhp)) { (void) fprintf(stderr, gettext("cannot unshare '%s': " "not currently shared\n"), path); } else { ret = zfs_unshareall_bypath(zhp, path); } } else { char mtpt_prop[ZFS_MAXPROPLEN]; verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop, sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0); if (is_manual) { ret = zfs_unmount(zhp, NULL, flags); } else if (strcmp(mtpt_prop, "legacy") == 0) { (void) fprintf(stderr, gettext("cannot unmount " "'%s': legacy mountpoint\n"), zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use umount(8) " "to unmount this filesystem\n")); } else { ret = zfs_unmountall(zhp, flags); } } out: zfs_close(zhp); return (ret != 0); } /* * Generic callback for unsharing or unmounting a filesystem. */ static int unshare_unmount(int op, int argc, char **argv) { int do_all = 0; int flags = 0; int ret = 0; int c; zfs_handle_t *zhp; char nfs_mnt_prop[ZFS_MAXPROPLEN]; char sharesmb[ZFS_MAXPROPLEN]; /* check options */ while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) { switch (c) { case 'a': do_all = 1; break; case 'f': flags = MS_FORCE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (do_all) { /* * We could make use of zfs_for_each() to walk all datasets in * the system, but this would be very inefficient, especially * since we would have to linearly search /etc/mtab for each * one. Instead, do one pass through /etc/mtab looking for * zfs entries and call zfs_unmount() for each one. * * Things get a little tricky if the administrator has created * mountpoints beneath other ZFS filesystems. In this case, we * have to unmount the deepest filesystems first. To accomplish * this, we place all the mountpoints in an AVL tree sorted by * the special type (dataset name), and walk the result in * reverse to make sure to get any snapshots first. */ struct mnttab entry; uu_avl_pool_t *pool; uu_avl_t *tree = NULL; unshare_unmount_node_t *node; uu_avl_index_t idx; uu_avl_walk_t *walk; if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if (((pool = uu_avl_pool_create("unmount_pool", sizeof (unshare_unmount_node_t), offsetof(unshare_unmount_node_t, un_avlnode), unshare_unmount_compare, UU_DEFAULT)) == NULL) || ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL)) nomem(); rewind(mnttab_file); while (getmntent(mnttab_file, &entry) == 0) { /* ignore non-ZFS entries */ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) continue; /* ignore snapshots */ if (strchr(entry.mnt_special, '@') != NULL) continue; if ((zhp = zfs_open(g_zfs, entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) { ret = 1; continue; } switch (op) { case OP_SHARE: verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); if (strcmp(nfs_mnt_prop, "off") != 0) break; verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); if (strcmp(nfs_mnt_prop, "off") == 0) continue; break; case OP_MOUNT: /* Ignore legacy mounts */ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); if (strcmp(nfs_mnt_prop, "legacy") == 0) continue; /* Ignore canmount=noauto mounts */ if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) continue; default: break; } node = safe_malloc(sizeof (unshare_unmount_node_t)); node->un_zhp = zhp; node->un_mountp = safe_strdup(entry.mnt_mountp); uu_avl_node_init(node, &node->un_avlnode, pool); if (uu_avl_find(tree, node, NULL, &idx) == NULL) { uu_avl_insert(tree, node, idx); } else { zfs_close(node->un_zhp); free(node->un_mountp); free(node); } } /* * Walk the AVL tree in reverse, unmounting each filesystem and * removing it from the AVL tree in the process. */ if ((walk = uu_avl_walk_start(tree, UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) nomem(); while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(tree, node); switch (op) { case OP_SHARE: if (zfs_unshareall_bypath(node->un_zhp, node->un_mountp) != 0) ret = 1; break; case OP_MOUNT: if (zfs_unmount(node->un_zhp, node->un_mountp, flags) != 0) ret = 1; break; } zfs_close(node->un_zhp); free(node->un_mountp); free(node); } uu_avl_walk_end(walk); uu_avl_destroy(tree); uu_avl_pool_destroy(pool); } else { if (argc != 1) { if (argc == 0) (void) fprintf(stderr, gettext("missing filesystem argument\n")); else (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } /* * We have an argument, but it may be a full path or a ZFS * filesystem. Pass full paths off to unmount_path() (shared by * manual_unmount), otherwise open the filesystem and pass to * zfs_unmount(). */ if (argv[0][0] == '/') return (unshare_unmount_path(op, argv[0], flags, B_FALSE)); if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) return (1); verify(zfs_prop_get(zhp, op == OP_SHARE ? ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); switch (op) { case OP_SHARE: verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, sharesmb, sizeof (sharesmb), NULL, NULL, 0, B_FALSE) == 0); if (strcmp(nfs_mnt_prop, "off") == 0 && strcmp(sharesmb, "off") == 0) { (void) fprintf(stderr, gettext("cannot " "unshare '%s': legacy share\n"), zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use " "unshare(1M) to unshare this " "filesystem\n")); ret = 1; } else if (!zfs_is_shared(zhp)) { (void) fprintf(stderr, gettext("cannot " "unshare '%s': not currently " "shared\n"), zfs_get_name(zhp)); ret = 1; } else if (zfs_unshareall(zhp) != 0) { ret = 1; } break; case OP_MOUNT: if (strcmp(nfs_mnt_prop, "legacy") == 0) { (void) fprintf(stderr, gettext("cannot " "unmount '%s': legacy " "mountpoint\n"), zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use " "umount(1M) to unmount this " "filesystem\n")); ret = 1; } else if (!zfs_is_mounted(zhp, NULL)) { (void) fprintf(stderr, gettext("cannot " "unmount '%s': not currently " "mounted\n"), zfs_get_name(zhp)); ret = 1; } else if (zfs_unmountall(zhp, flags) != 0) { ret = 1; } break; } zfs_close(zhp); } return (ret); } /* * zfs unmount -a * zfs unmount filesystem * * Unmount all filesystems, or a specific ZFS filesystem. */ static int zfs_do_unmount(int argc, char **argv) { return (unshare_unmount(OP_MOUNT, argc, argv)); } /* * zfs unshare -a * zfs unshare filesystem * * Unshare all filesystems, or a specific ZFS filesystem. */ static int zfs_do_unshare(int argc, char **argv) { return (unshare_unmount(OP_SHARE, argc, argv)); } static int find_command_idx(char *command, int *idx) { int i; for (i = 0; i < NCOMMAND; i++) { if (command_table[i].name == NULL) continue; if (strcmp(command, command_table[i].name) == 0) { *idx = i; return (0); } } return (1); } static int zfs_do_diff(int argc, char **argv) { zfs_handle_t *zhp; int flags = 0; char *tosnap = NULL; char *fromsnap = NULL; char *atp, *copy; int err = 0; int c; while ((c = getopt(argc, argv, "FHt")) != -1) { switch (c) { case 'F': flags |= ZFS_DIFF_CLASSIFY; break; case 'H': flags |= ZFS_DIFF_PARSEABLE; break; case 't': flags |= ZFS_DIFF_TIMESTAMP; break; default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("must provide at least one snapshot name\n")); usage(B_FALSE); } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } fromsnap = argv[0]; tosnap = (argc == 2) ? argv[1] : NULL; copy = NULL; if (*fromsnap != '@') copy = strdup(fromsnap); else if (tosnap) copy = strdup(tosnap); if (copy == NULL) usage(B_FALSE); if ((atp = strchr(copy, '@'))) *atp = '\0'; if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL) return (1); free(copy); /* * Ignore SIGPIPE so that the library can give us * information on any failure */ (void) sigignore(SIGPIPE); err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags); zfs_close(zhp); return (err != 0); } int main(int argc, char **argv) { int ret = 0; int i = 0; char *cmdname; (void) setlocale(LC_ALL, ""); (void) textdomain(TEXT_DOMAIN); opterr = 0; if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) { (void) fprintf(stderr, gettext("internal error: unable to " "open %s\n"), MNTTAB); return (1); } /* * Make sure the user has specified some command. */ if (argc < 2) { (void) fprintf(stderr, gettext("missing command\n")); usage(B_FALSE); } cmdname = argv[1]; /* * The 'umount' command is an alias for 'unmount' */ if (strcmp(cmdname, "umount") == 0) cmdname = "unmount"; /* * The 'recv' command is an alias for 'receive' */ if (strcmp(cmdname, "recv") == 0) cmdname = "receive"; /* * The 'snap' command is an alias for 'snapshot' */ if (strcmp(cmdname, "snap") == 0) cmdname = "snapshot"; /* * Special case '-?' */ if ((strcmp(cmdname, "-?") == 0) || (strcmp(cmdname, "--help") == 0)) usage(B_TRUE); if ((g_zfs = libzfs_init()) == NULL) return (1); zpool_set_history_str("zfs", argc, argv, history_str); verify(zpool_stage_history(g_zfs, history_str) == 0); libzfs_print_on_error(g_zfs, B_TRUE); /* * Run the appropriate command. */ libzfs_mnttab_cache(g_zfs, B_FALSE); if (find_command_idx(cmdname, &i) == 0) { current_command = &command_table[i]; ret = command_table[i].func(argc - 1, argv + 1); } else if (strchr(cmdname, '=') != NULL) { verify(find_command_idx("set", &i) == 0); current_command = &command_table[i]; ret = command_table[i].func(argc, argv); } else { (void) fprintf(stderr, gettext("unrecognized " "command '%s'\n"), cmdname); usage(B_FALSE); ret = 1; } libzfs_fini(g_zfs); (void) fclose(mnttab_file); /* * The 'ZFS_ABORT' environment variable causes us to dump core on exit * for the purposes of running ::findleaks. */ if (getenv("ZFS_ABORT") != NULL) { (void) printf("dumping core by request\n"); abort(); } return (ret); } diff --git a/include/libzfs.h b/include/libzfs.h index 096910bb1973..83e1bee6e756 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -1,735 +1,750 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _LIBZFS_H #define _LIBZFS_H #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Miscellaneous ZFS constants */ #define ZFS_MAXNAMELEN MAXNAMELEN #define ZPOOL_MAXNAMELEN MAXNAMELEN #define ZFS_MAXPROPLEN MAXPATHLEN #define ZPOOL_MAXPROPLEN MAXPATHLEN /* * Default device paths */ #if defined(__sun__) || defined(__sun) #define DISK_ROOT "/dev/dsk" #define RDISK_ROOT "/dev/rdsk" #define UDISK_ROOT RDISK_ROOT #define FIRST_SLICE "s0" #define BACKUP_SLICE "s2" #endif #ifdef __linux__ #define DISK_ROOT "/dev" #define RDISK_ROOT DISK_ROOT #define UDISK_ROOT "/dev/disk" #define FIRST_SLICE "1" #define BACKUP_SLICE "" #endif /* * libzfs errors */ enum { EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ EZFS_PROPTYPE, /* property does not apply to dataset type */ EZFS_PROPNONINHERIT, /* property is not inheritable */ EZFS_PROPSPACE, /* bad quota or reservation */ EZFS_BADTYPE, /* dataset is not of appropriate type */ EZFS_BUSY, /* pool or dataset is busy */ EZFS_EXISTS, /* pool or dataset already exists */ EZFS_NOENT, /* no such pool or dataset */ EZFS_BADSTREAM, /* bad backup stream */ EZFS_DSREADONLY, /* dataset is readonly */ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ EZFS_INVALIDNAME, /* invalid dataset name */ EZFS_BADRESTORE, /* unable to restore to destination */ EZFS_BADBACKUP, /* backup failed */ EZFS_BADTARGET, /* bad attach/detach/replace target */ EZFS_NODEVICE, /* no such device in pool */ EZFS_BADDEV, /* invalid device to add */ EZFS_NOREPLICAS, /* no valid replicas */ EZFS_RESILVERING, /* currently resilvering */ EZFS_BADVERSION, /* unsupported version */ EZFS_POOLUNAVAIL, /* pool is currently unavailable */ EZFS_DEVOVERFLOW, /* too many devices in one vdev */ EZFS_BADPATH, /* must be an absolute path */ EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ EZFS_ZONED, /* used improperly in local zone */ EZFS_MOUNTFAILED, /* failed to mount dataset */ EZFS_UMOUNTFAILED, /* failed to unmount dataset */ EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */ EZFS_SHARENFSFAILED, /* share(1M) failed */ EZFS_PERM, /* permission denied */ EZFS_NOSPC, /* out of space */ EZFS_FAULT, /* bad address */ EZFS_IO, /* I/O error */ EZFS_INTR, /* signal received */ EZFS_ISSPARE, /* device is a hot spare */ EZFS_INVALCONFIG, /* invalid vdev configuration */ EZFS_RECURSIVE, /* recursive dependency */ EZFS_NOHISTORY, /* no history object */ EZFS_POOLPROPS, /* couldn't retrieve pool props */ EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ EZFS_NAMETOOLONG, /* dataset name is too long */ EZFS_OPENFAILED, /* open of device failed */ EZFS_NOCAP, /* couldn't get capacity */ EZFS_LABELFAILED, /* write of label failed */ EZFS_BADWHO, /* invalid permission who */ EZFS_BADPERM, /* invalid permission */ EZFS_BADPERMSET, /* invalid permission set name */ EZFS_NODELEGATION, /* delegated administration is disabled */ EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ EZFS_SHARESMBFAILED, /* failed to share over smb */ EZFS_BADCACHE, /* bad cache file */ EZFS_ISL2CACHE, /* device is for the level 2 ARC */ EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_NOTSUP, /* ops not supported on this dataset */ EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ EZFS_REFTAG_RELE, /* snapshot release: tag not found */ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ EZFS_PIPEFAILED, /* pipe create failed */ EZFS_THREADCREATEFAILED, /* thread create failed */ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ EZFS_SCRUBBING, /* currently scrubbing */ EZFS_NO_SCRUB, /* no active scrub */ EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_UNKNOWN }; /* * The following data structures are all part * of the zfs_allow_t data structure which is * used for printing 'allow' permissions. * It is a linked list of zfs_allow_t's which * then contain avl tree's for user/group/sets/... * and each one of the entries in those trees have * avl tree's for the permissions they belong to and * whether they are local,descendent or local+descendent * permissions. The AVL trees are used primarily for * sorting purposes, but also so that we can quickly find * a given user and or permission. */ typedef struct zfs_perm_node { avl_node_t z_node; char z_pname[MAXPATHLEN]; } zfs_perm_node_t; typedef struct zfs_allow_node { avl_node_t z_node; char z_key[MAXPATHLEN]; /* name, such as joe */ avl_tree_t z_localdescend; /* local+descendent perms */ avl_tree_t z_local; /* local permissions */ avl_tree_t z_descend; /* descendent permissions */ } zfs_allow_node_t; typedef struct zfs_allow { struct zfs_allow *z_next; char z_setpoint[MAXPATHLEN]; avl_tree_t z_sets; avl_tree_t z_crperms; avl_tree_t z_user; avl_tree_t z_group; avl_tree_t z_everyone; } zfs_allow_t; /* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; typedef struct zpool_handle zpool_handle_t; typedef struct libzfs_handle libzfs_handle_t; /* * Library initialization */ extern libzfs_handle_t *libzfs_init(void); extern void libzfs_fini(libzfs_handle_t *); extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); extern void libzfs_mnttab_init(libzfs_handle_t *); extern void libzfs_mnttab_fini(libzfs_handle_t *); extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, struct mnttab *); extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, const char *, const char *); extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); /* * Basic handle functions */ extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); extern void zpool_close(zpool_handle_t *); extern const char *zpool_get_name(zpool_handle_t *); extern int zpool_get_state(zpool_handle_t *); extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t); extern void zpool_free_handles(libzfs_handle_t *); /* * Iterate over all active pools in the system. */ typedef int (*zpool_iter_f)(zpool_handle_t *, void *); extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); /* * Functions to create and destroy pools */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); extern int zpool_destroy(zpool_handle_t *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { /* do not split, but return the config that would be split off */ int dryrun : 1; /* after splitting, import the pool */ int import : 1; } splitflags_t; /* * Functions to manipulate pool and vdev state */ extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int); extern int zpool_vdev_detach(zpool_handle_t *, const char *); extern int zpool_vdev_remove(zpool_handle_t *, const char *); extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); /* * Functions to manage pool properties */ extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, size_t proplen, zprop_source_t *); extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, zprop_source_t *); extern const char *zpool_prop_to_name(zpool_prop_t); extern const char *zpool_prop_values(zpool_prop_t); /* * Pool health statistics. */ typedef enum { /* * The following correspond to faults as defined in the (fault.fs.zfs.*) * event namespace. Each is associated with a corresponding message ID. */ ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ /* * These faults have no corresponding message ID. At the time we are * checking the status, the original reason for the FMA fault (I/O or * checksum errors) has been lost. */ ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ /* * The following are not faults per se, but still an error possibly * requiring administrative attention. There is no corresponding * message ID. */ ZPOOL_STATUS_VERSION_OLDER, /* older on-disk version */ ZPOOL_STATUS_RESILVERING, /* device being resilvered */ ZPOOL_STATUS_OFFLINE_DEV, /* device online */ ZPOOL_STATUS_REMOVED_DEV, /* removed device */ /* * Finally, the following indicates a healthy pool. */ ZPOOL_STATUS_OK } zpool_status_t; extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); extern zpool_status_t zpool_import_status(nvlist_t *, char **); extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* * Statistics and configuration functions. */ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); /* * Import and export functions */ extern int zpool_export(zpool_handle_t *, boolean_t); extern int zpool_export_force(zpool_handle_t *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, nvlist_t *, int); /* * Search for pools to import */ typedef struct importargs { char **path; /* a list of paths to search */ int paths; /* number of paths to search */ char *poolname; /* name of a pool to find */ uint64_t guid; /* guid of a pool to find */ char *cachefile; /* cachefile to use for import */ int can_be_active : 1; /* can the pool be active? */ int unique : 1; /* does 'poolname' already exist? */ int exists : 1; /* set on return if pool already exists */ } importargs_t; extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); /* legacy pool search routines */ extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, char *, uint64_t); /* * Miscellaneous pool functions */ struct zfs_cmd; extern const char *zfs_history_event_names[LOG_END]; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, boolean_t verbose); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); extern void zpool_set_history_str(const char *subcommand, int argc, char **argv, char *history_str); extern int zpool_stage_history(libzfs_handle_t *, const char *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int); extern int zpool_events_clear(libzfs_handle_t *, int *); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. */ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); +extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); /* * Property management functions. Some functions are shared with the kernel, * and are found in sys/fs/zfs.h. */ /* * zfs dataset property management */ extern const char *zfs_prop_default_string(zfs_prop_t); extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, uint64_t, zfs_handle_t *, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zprop_source_t *, char *, size_t); extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); +extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue); +extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal); +extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, + uint64_t *usedp); extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); +extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); typedef struct zprop_list { int pl_prop; char *pl_user_prop; struct zprop_list *pl_next; boolean_t pl_all; size_t pl_width; size_t pl_recvd_width; boolean_t pl_fixed; } zprop_list_t; extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" #define ZFS_MOUNTPOINT_LEGACY "legacy" /* * zpool property management */ extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **); extern const char *zpool_prop_default_string(zpool_prop_t); extern uint64_t zpool_prop_default_numeric(zpool_prop_t); extern const char *zpool_prop_column_name(zpool_prop_t); extern boolean_t zpool_prop_align_right(zpool_prop_t); /* * Functions shared by zfs and zpool property management. */ extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type); extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); extern void zprop_free_list(zprop_list_t *); #define ZFS_GET_NCOLS 5 typedef enum { GET_COL_NONE, GET_COL_NAME, GET_COL_PROPERTY, GET_COL_VALUE, GET_COL_RECVD, GET_COL_SOURCE } zfs_get_column_t; /* * Functions for printing zfs or zpool properties */ typedef struct zprop_get_cbdata { int cb_sources; zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; int cb_colwidths[ZFS_GET_NCOLS + 1]; boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; zprop_list_t *cb_proplist; zfs_type_t cb_type; } zprop_get_cbdata_t; void zprop_print_one_property(const char *, zprop_get_cbdata_t *, const char *, const char *, zprop_source_t, const char *, const char *); /* * Iterator functions. */ typedef int (*zfs_iter_f)(zfs_handle_t *, void *); extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; boolean_t cb_verbose; int (*cb_getone)(zfs_handle_t *, void *); } get_all_cb_t; void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); int libzfs_dataset_cmp(const void *, const void *); /* * Functions to create and destroy datasets. */ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, nvlist_t *); extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); +extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); extern int zfs_rename(zfs_handle_t *, const char *, boolean_t); typedef struct sendflags { /* print informational messages (ie, -v was specified) */ - int verbose : 1; + boolean_t verbose; /* recursive send (ie, -R) */ - int replicate : 1; + boolean_t replicate; /* for incrementals, do all intermediate snapshots */ - int doall : 1; /* (ie, -I) */ + boolean_t doall; /* if dataset is a clone, do incremental from its origin */ - int fromorigin : 1; + boolean_t fromorigin; /* do deduplication */ - int dedup : 1; + boolean_t dedup; /* send properties (ie, -p) */ - int props : 1; + boolean_t props; + + /* do not send (no-op, ie. -n) */ + boolean_t dryrun; + + /* parsable verbose output (ie. -P) */ + boolean_t parsable; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); -extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - sendflags_t flags, int outfd, snapfilter_cb_t filter_func, - void *cb_arg, nvlist_t **debugnvp); +extern int zfs_send(zfs_handle_t *, const char *, const char *, + sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, boolean_t, boolean_t, int, uint64_t, uint64_t); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, zfs_userspace_cb_t, void *); extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ - int verbose : 1; + boolean_t verbose; /* the destination is a prefix, not the exact fs (ie, -d) */ - int isprefix : 1; + boolean_t isprefix; /* * Only the tail of the sent snapshot path is appended to the * destination to determine the received snapshot name (ie, -e). */ - int istail : 1; + boolean_t istail; /* do not actually do the recv, just check if it would work (ie, -n) */ - int dryrun : 1; + boolean_t dryrun; /* rollback/destroy filesystems as necessary (eg, -F) */ - int force : 1; + boolean_t force; /* set "canmount=off" on all modified filesystems */ - int canmountoff : 1; + boolean_t canmountoff; /* byteswap flag is used internally; callers need not specify */ - int byteswap : 1; + boolean_t byteswap; /* do not mount file systems as they are extracted (private) */ - int nomount : 1; + boolean_t nomount; } recvflags_t; -extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t, +extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 0x1, ZFS_DIFF_TIMESTAMP = 0x2, ZFS_DIFF_CLASSIFY = 0x4 } diff_flags_t; extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, int); /* * Miscellaneous functions. */ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); extern void zfs_append_partition(const char *path, char *buf, size_t buflen); extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen); /* * Mount support functions. */ extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); extern int zfs_mount(zfs_handle_t *, const char *, int); extern int zfs_unmount(zfs_handle_t *, const char *, int); extern int zfs_unmountall(zfs_handle_t *, int); /* * Share support functions. */ extern boolean_t zfs_is_shared(zfs_handle_t *); extern int zfs_share(zfs_handle_t *); extern int zfs_unshare(zfs_handle_t *); /* * Protocol-specific share support functions. */ extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); extern int zfs_share_nfs(zfs_handle_t *); extern int zfs_share_smb(zfs_handle_t *); extern int zfs_shareall(zfs_handle_t *); extern int zfs_unshare_nfs(zfs_handle_t *, const char *); extern int zfs_unshare_smb(zfs_handle_t *, const char *); extern int zfs_unshareall_nfs(zfs_handle_t *); extern int zfs_unshareall_smb(zfs_handle_t *); extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); extern int zfs_unshareall(zfs_handle_t *); extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); /* * Utility function to convert a number to a human-readable form. */ extern void zfs_nicenum(uint64_t, char *, size_t); extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); /* * Utility functions to run an external process. */ #define STDOUT_VERBOSE 0x01 #define STDERR_VERBOSE 0x02 int libzfs_run_process(const char *, char **, int flags); int libzfs_load_module(const char *); /* * Given a device or file, determine if it is part of a pool. */ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, boolean_t *); /* * Label manipulation. */ extern int zpool_read_label(int, nvlist_t **); extern int zpool_clear_label(int); /* * Management interfaces for SMB ACL files */ int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); /* * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); /* * Mappings between vdev and FRU. */ extern void libzfs_fru_refresh(libzfs_handle_t *); extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *); extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *); extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *, const char *); extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *); extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *); #ifdef __cplusplus } #endif #endif /* _LIBZFS_H */ diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index 2389b7823aaa..fabcb1183144 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -1,220 +1,222 @@ /* * CDDL HEADER SART * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _LIBFS_IMPL_H #define _LIBFS_IMPL_H #include #include #include #include #include #include #include #include #if defined(HAVE_LIBTOPO) #include #endif /* HAVE_LIBTOPO */ #ifdef __cplusplus extern "C" { #endif #ifdef VERIFY #undef VERIFY #endif #define VERIFY verify typedef struct libzfs_fru { char *zf_device; char *zf_fru; struct libzfs_fru *zf_chain; struct libzfs_fru *zf_next; } libzfs_fru_t; struct libzfs_handle { int libzfs_error; int libzfs_fd; FILE *libzfs_mnttab; FILE *libzfs_sharetab; zpool_handle_t *libzfs_pool_handles; uu_avl_pool_t *libzfs_ns_avlpool; uu_avl_t *libzfs_ns_avl; uint64_t libzfs_ns_gen; int libzfs_desc_active; char libzfs_action[1024]; char libzfs_desc[1024]; char *libzfs_log_str; int libzfs_printerr; int libzfs_storeerr; /* stuff error messages into buffer */ void *libzfs_sharehdl; /* libshare handle */ uint_t libzfs_shareflags; boolean_t libzfs_mnttab_enable; avl_tree_t libzfs_mnttab_cache; int libzfs_pool_iter; #if defined(HAVE_LIBTOPO) topo_hdl_t *libzfs_topo_hdl; libzfs_fru_t **libzfs_fru_hash; libzfs_fru_t *libzfs_fru_list; #endif /* HAVE_LIBTOPO */ char libzfs_chassis_id[256]; }; #define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */ struct zfs_handle { libzfs_handle_t *zfs_hdl; zpool_handle_t *zpool_hdl; char zfs_name[ZFS_MAXNAMELEN]; zfs_type_t zfs_type; /* type including snapshot */ zfs_type_t zfs_head_type; /* type excluding snapshot */ dmu_objset_stats_t zfs_dmustats; nvlist_t *zfs_props; nvlist_t *zfs_user_props; nvlist_t *zfs_recvd_props; boolean_t zfs_mntcheck; char *zfs_mntopts; uint8_t *zfs_props_table; }; /* * This is different from checking zfs_type, because it will also catch * snapshots of volumes. */ #define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME) struct zpool_handle { libzfs_handle_t *zpool_hdl; zpool_handle_t *zpool_next; char zpool_name[ZPOOL_MAXNAMELEN]; int zpool_state; size_t zpool_config_size; nvlist_t *zpool_config; nvlist_t *zpool_old_config; nvlist_t *zpool_props; diskaddr_t zpool_start_block; }; -typedef enum { +typedef enum { PROTO_NFS = 0, PROTO_SMB = 1, PROTO_END = 2 } zfs_share_proto_t; /* * The following can be used as a bitmask and any new values * added must preserve that capability. */ typedef enum { SHARED_NOT_SHARED = 0x0, SHARED_NFS = 0x2, SHARED_SMB = 0x4 } zfs_share_type_t; int zfs_error(libzfs_handle_t *, int, const char *); int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...); void zfs_error_aux(libzfs_handle_t *, const char *, ...); void *zfs_alloc(libzfs_handle_t *, size_t); void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t); char *zfs_asprintf(libzfs_handle_t *, const char *, ...); char *zfs_strdup(libzfs_handle_t *, const char *); int no_memory(libzfs_handle_t *); int zfs_standard_error(libzfs_handle_t *, int, const char *); int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); int zpool_standard_error(libzfs_handle_t *, int, const char *); int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***, size_t *); - +zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *); +zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *); int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t, nvlist_t *, char **, uint64_t *, const char *); int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type); /* * Use this changelist_gather() flag to force attempting mounts * on each change node regardless of whether or not it is currently * mounted. */ #define CL_GATHER_MOUNT_ALWAYS 1 typedef struct prop_changelist prop_changelist_t; int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t); int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *); int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **); void zcmd_free_nvlists(zfs_cmd_t *); int changelist_prefix(prop_changelist_t *); int changelist_postfix(prop_changelist_t *); void changelist_rename(prop_changelist_t *, const char *, const char *); void changelist_remove(prop_changelist_t *, const char *); void changelist_free(prop_changelist_t *); prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int); int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *); int changelist_haszonedchild(prop_changelist_t *); void remove_mountpoint(zfs_handle_t *); int create_parents(libzfs_handle_t *, char *, int); boolean_t isa_child_of(const char *dataset, const char *parent); zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); int zvol_create_link(libzfs_handle_t *, const char *); int zvol_remove_link(libzfs_handle_t *, const char *); boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, boolean_t modifying); void namespace_clear(libzfs_handle_t *); /* * libshare (sharemgr) interfaces used internally. */ extern int zfs_init_libshare(libzfs_handle_t *, int); extern void zfs_uninit_libshare(libzfs_handle_t *); extern int zfs_parse_options(char *, zfs_share_proto_t); extern int zfs_unshare_proto(zfs_handle_t *, const char *, zfs_share_proto_t *); extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); #ifdef __cplusplus } #endif #endif /* _LIBFS_IMPL_H */ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 3f9c711b95e2..5b2e25b78cf6 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -1,745 +1,748 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ #ifndef _SYS_DMU_H #define _SYS_DMU_H /* * This file describes the interface that the DMU provides for its * consumers. * * The DMU also interacts with the SPA. That interface is described in * dmu_spa.h. */ #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif struct page; struct vnode; struct spa; struct zilog; struct zio; struct blkptr; struct zap_cursor; struct dsl_dataset; struct dsl_pool; struct dnode; struct drr_begin; struct drr_end; struct zbookmark; struct spa; struct nvlist; struct arc_buf; struct zio_prop; struct sa_handle; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; typedef struct dsl_dir dsl_dir_t; typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ DMU_OT_OBJECT_DIRECTORY, /* ZAP */ DMU_OT_OBJECT_ARRAY, /* UINT64 */ DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ DMU_OT_BPOBJ, /* UINT64 */ DMU_OT_BPOBJ_HDR, /* UINT64 */ /* spa: */ DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ DMU_OT_SPACE_MAP, /* UINT64 */ /* zil: */ DMU_OT_INTENT_LOG, /* UINT64 */ /* dmu: */ DMU_OT_DNODE, /* DNODE */ DMU_OT_OBJSET, /* OBJSET */ /* dsl: */ DMU_OT_DSL_DIR, /* UINT64 */ DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ DMU_OT_DSL_PROPS, /* ZAP */ DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ DMU_OT_OLDACL, /* Old ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ DMU_OT_UNLINKED_SET, /* ZAP */ /* zvol: */ DMU_OT_ZVOL, /* UINT8 */ DMU_OT_ZVOL_PROP, /* ZAP */ /* other; for testing only! */ DMU_OT_PLAIN_OTHER, /* UINT8 */ DMU_OT_UINT64_OTHER, /* UINT64 */ DMU_OT_ZAP_OTHER, /* ZAP */ /* new object types: */ DMU_OT_ERROR_LOG, /* ZAP */ DMU_OT_SPA_HISTORY, /* UINT8 */ DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ DMU_OT_POOL_PROPS, /* ZAP */ DMU_OT_DSL_PERMS, /* ZAP */ DMU_OT_ACL, /* ACL */ DMU_OT_SYSACL, /* SYSACL */ DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ DMU_OT_NEXT_CLONES, /* ZAP */ DMU_OT_SCAN_QUEUE, /* ZAP */ DMU_OT_USERGROUP_USED, /* ZAP */ DMU_OT_USERGROUP_QUOTA, /* ZAP */ DMU_OT_USERREFS, /* ZAP */ DMU_OT_DDT_ZAP, /* ZAP */ DMU_OT_DDT_STATS, /* ZAP */ DMU_OT_SA, /* System attr */ DMU_OT_SA_MASTER_NODE, /* ZAP */ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ DMU_OT_SCAN_XLATE, /* ZAP */ DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ DMU_OT_DEADLIST, /* ZAP */ DMU_OT_DEADLIST_HDR, /* UINT64 */ DMU_OT_DSL_CLONES, /* ZAP */ DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ DMU_OT_NUMTYPES } dmu_object_type_t; typedef enum dmu_objset_type { DMU_OST_NONE, DMU_OST_META, DMU_OST_ZFS, DMU_OST_ZVOL, DMU_OST_OTHER, /* For testing only! */ DMU_OST_ANY, /* Be careful! */ DMU_OST_NUMTYPES } dmu_objset_type_t; void byteswap_uint64_array(void *buf, size_t size); void byteswap_uint32_array(void *buf, size_t size); void byteswap_uint16_array(void *buf, size_t size); void byteswap_uint8_array(void *buf, size_t size); void zap_byteswap(void *buf, size_t size); void zfs_oldacl_byteswap(void *buf, size_t size); void zfs_acl_byteswap(void *buf, size_t size); void zfs_znode_byteswap(void *buf, size_t size); #define DS_FIND_SNAPSHOTS (1<<0) #define DS_FIND_CHILDREN (1<<1) /* * The maximum number of bytes that can be accessed as part of one * operation, including metadata. */ #define DMU_MAX_ACCESS (10<<20) /* 10MB */ #define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */ #define DMU_USERUSED_OBJECT (-1ULL) #define DMU_GROUPUSED_OBJECT (-2ULL) #define DMU_DEADLIST_OBJECT (-3ULL) /* * artificial blkids for bonus buffer and spill blocks */ #define DMU_BONUS_BLKID (-1ULL) #define DMU_SPILL_BLKID (-2ULL) /* * Public routines to create, destroy, open, and close objsets. */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); int dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); +int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *); int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); int dmu_objset_rename(const char *name, const char *newname, boolean_t recursive); int dmu_objset_find(char *name, int func(const char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); typedef struct dmu_buf { uint64_t db_object; /* object that this buffer is part of */ uint64_t db_offset; /* byte offset in this object */ uint64_t db_size; /* size of buffer in bytes */ void *db_data; /* data in buffer */ } dmu_buf_t; typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr); /* * The names of zap entries in the DIRECTORY_OBJECT of the MOS. */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPOBJ "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" #define DMU_POOL_ERRLOG_LAST "errlog_last" #define DMU_POOL_SPARES "spares" #define DMU_POOL_DEFLATE "deflate" #define DMU_POOL_HISTORY "history" #define DMU_POOL_PROPS "pool_props" #define DMU_POOL_L2CACHE "l2cache" #define DMU_POOL_TMP_USERREFS "tmp_userrefs" #define DMU_POOL_DDT "DDT-%s-%s-%s" #define DMU_POOL_DDT_STATS "DDT-statistics" #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" #define DMU_POOL_FREE_BPOBJ "free_bpobj" /* * Allocate an object from this objset. The range of object numbers * available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode. * * The transaction must be assigned to a txg. The newly allocated * object will be "held" in the transaction (ie. you can modify the * newly allocated object in this transaction). * * dmu_object_alloc() chooses an object and returns it in *objectp. * * dmu_object_claim() allocates a specific object number. If that * number is already allocated, it fails and returns EEXIST. * * Return 0 on success, or ENOSPC or EEXIST as specified above. */ uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen); /* * Free an object from this objset. * * The object's data will be freed as well (ie. you don't need to call * dmu_free(object, 0, -1, tx)). * * The object need not be held in the transaction. * * If there are any holds on this object's buffers (via dmu_buf_hold()), * or tx holds on the object (via dmu_tx_hold_object()), you can not * free it; it fails and returns EBUSY. * * If the object is not allocated, it fails and returns ENOENT. * * Return 0 on success, or EBUSY or ENOENT as specified above. */ int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx); /* * Find the next allocated or free object. * * The objectp parameter is in-out. It will be updated to be the next * object which is allocated. Ignore objects which have not been * modified since txg. * * XXX Can only be called on a objset with no dirty data. * * Returns 0 on success, or ENOENT if there are no more objects. */ int dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg); /* * Set the data blocksize for an object. * * The object cannot have any blocks allcated beyond the first. If * the first block is allocated already, the new size must be greater * than the current block size. If these conditions are not met, * ENOTSUP will be returned. * * Returns 0 on success, or EBUSY if there are any holds on the object * contents, or ENOTSUP as described above. */ int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx); /* * Set the checksum property on a dnode. The new checksum algorithm will * apply to all newly written blocks; existing blocks will not be affected. */ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx); /* * Set the compress property on a dnode. The new compression algorithm will * apply to all newly written blocks; existing blocks will not be affected. */ void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, dmu_tx_t *tx); /* * Decide how to write a block: checksum, compression, number of copies, etc. */ #define WP_NOFILL 0x1 #define WP_DMU_SYNC 0x2 #define WP_SPILL 0x4 void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp, struct zio_prop *zp); /* * The bonus data is accessed more or less like a regular buffer. * You must dmu_bonus_hold() to get the buffer, which will give you a * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus * data. As with any normal buffer, you must call dmu_buf_read() to * read db_data, dmu_buf_will_dirty() before modifying it, and the * object must be held in an assigned transaction before calling * dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus * buffer as well. You must release your hold with dmu_buf_rele(). */ int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *); dmu_object_type_t dmu_get_bonustype(dmu_buf_t *); int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *); /* * Special spill buffer support used by "SA" framework */ int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags, void *tag, dmu_buf_t **dbp); int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); /* * Obtain the DMU buffer from the specified object which contains the * specified offset. dmu_buf_hold() puts a "hold" on the buffer, so * that it will remain in memory. You must release the hold with * dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your * hold. You must have a hold on any dmu_buf_t* you pass to the DMU. * * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill * on the returned buffer before reading or writing the buffer's * db_data. The comments for those routines describe what particular * operations are valid after calling them. * * The object number must be a valid, allocated object number. */ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **, int flags); void dmu_buf_add_ref(dmu_buf_t *db, void* tag); void dmu_buf_rele(dmu_buf_t *db, void *tag); uint64_t dmu_buf_refcount(dmu_buf_t *db); /* * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a * range of an object. A pointer to an array of dmu_buf_t*'s is * returned (in *dbpp). * * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and * frees the array. The hold on the array of buffers MUST be released * with dmu_buf_rele_array. You can NOT release the hold on each buffer * individually with dmu_buf_rele. */ int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp); void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag); /* * Returns NULL on success, or the existing user ptr if it's already * been set. * * user_ptr is for use by the user and can be obtained via dmu_buf_get_user(). * * user_data_ptr_ptr should be NULL, or a pointer to a pointer which * will be set to db->db_data when you are allowed to access it. Note * that db->db_data (the pointer) can change when you do dmu_buf_read(), * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill(). * *user_data_ptr_ptr will be set to the new value when it changes. * * If non-NULL, pageout func will be called when this buffer is being * excised from the cache, so that you can clean up the data structure * pointed to by user_ptr. * * dmu_evict_user() will call the pageout func for all buffers in a * objset with a given pageout func. */ void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func); /* * set_user_ie is the same as set_user, but request immediate eviction * when hold count goes to zero. */ void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func); void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func); void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func); /* * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set. */ void *dmu_buf_get_user(dmu_buf_t *db); /* * Indicate that you are going to modify the buffer's data (db_data). * * The transaction (tx) must be assigned to a txg (ie. you've called * dmu_tx_assign()). The buffer's object must be held in the tx * (ie. you've called dmu_tx_hold_object(tx, db->db_object)). */ void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); /* * Tells if the given dbuf is freeable. */ boolean_t dmu_buf_freeable(dmu_buf_t *); /* * You must create a transaction, then hold the objects which you will * (or might) modify as part of this transaction. Then you must assign * the transaction to a transaction group. Once the transaction has * been assigned, you can modify buffers which belong to held objects as * part of this transaction. You can't modify buffers before the * transaction has been assigned; you can't modify buffers which don't * belong to objects which this transaction holds; you can't hold * objects once the transaction has been assigned. You may hold an * object which you are going to free (with dmu_object_free()), but you * don't have to. * * You can abort the transaction before it has been assigned. * * Note that you may hold buffers (with dmu_buf_hold) at any time, * regardless of transaction state. */ #define DMU_NEW_OBJECT (-1ULL) #define DMU_OBJECT_END (-1ULL) dmu_tx_t *dmu_tx_create(objset_t *os); void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len); void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len); void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name); void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object); void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object); void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); void dmu_tx_abort(dmu_tx_t *tx); int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); /* * To register a commit callback, dmu_tx_callback_register() must be called. * * dcb_data is a pointer to caller private data that is passed on as a * callback parameter. The caller is responsible for properly allocating and * freeing it. * * When registering a callback, the transaction must be already created, but * it cannot be committed or aborted. It can be assigned to a txg or not. * * The callback will be called after the transaction has been safely written * to stable storage and will also be called if the dmu_tx is aborted. * If there is any error which prevents the transaction from being committed to * disk, the callback will be called with a value of error != 0. */ typedef void dmu_tx_callback_func_t(void *dcb_data, int error); void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, void *dcb_data); /* * Free up the data blocks for a defined range of a file. If size is * zero, the range from offset to end-of-file is freed. */ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size); int dmu_free_object(objset_t *os, uint64_t object); /* * Convenience functions. * * Canfail routines will return 0 on success, or an errno if there is a * nonrecoverable I/O error. */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); #ifdef _KERNEL #include int dmu_read_req(objset_t *os, uint64_t object, struct request *req); int dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, dmu_tx_t *tx); int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, dmu_tx_t *tx); #endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, dmu_tx_t *tx); int dmu_xuio_init(struct xuio *uio, int niov); void dmu_xuio_fini(struct xuio *uio); int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, size_t n); int dmu_xuio_cnt(struct xuio *uio); struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i); void dmu_xuio_clear(struct xuio *uio, int i); void xuio_stat_wbuf_copied(void); void xuio_stat_wbuf_nocopy(void); extern int zfs_prefetch_disable; /* * Asynchronously try to read in the data. */ void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len); typedef struct dmu_object_info { /* All sizes are in bytes unless otherwise indicated. */ uint32_t doi_data_block_size; uint32_t doi_metadata_block_size; dmu_object_type_t doi_type; dmu_object_type_t doi_bonus_type; uint64_t doi_bonus_size; uint8_t doi_indirection; /* 2 = dnode->indirect->data */ uint8_t doi_checksum; uint8_t doi_compress; uint8_t doi_pad[5]; uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */ uint64_t doi_max_offset; uint64_t doi_fill_count; /* number of non-empty blocks */ } dmu_object_info_t; typedef void arc_byteswap_func_t(void *buf, size_t size); typedef struct dmu_object_type_info { arc_byteswap_func_t *ot_byteswap; boolean_t ot_metadata; char *ot_name; } dmu_object_type_info_t; extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES]; /* * Get information on a DMU object. * * Return 0 on success or ENOENT if object is not allocated. * * If doi is NULL, just indicates whether the object exists. */ int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi); void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi); void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi); void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512); typedef struct dmu_objset_stats { uint64_t dds_num_clones; /* number of clones of this */ uint64_t dds_creation_txg; uint64_t dds_guid; dmu_objset_type_t dds_type; uint8_t dds_is_snapshot; uint8_t dds_inconsistent; char dds_origin[MAXNAMELEN]; } dmu_objset_stats_t; /* * Get stats on a dataset. */ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); /* * Add entries to the nvlist for all the objset's properties. See * zfs_prop_table[] and zfs(1m) for details on the properties. */ void dmu_objset_stats(objset_t *os, struct nvlist *nv); /* * Get the space usage statistics for statvfs(). * * refdbytes is the amount of space "referenced" by this objset. * availbytes is the amount of space available to this objset, taking * into account quotas & reservations, assuming that no other objsets * use the space first. These values correspond to the 'referenced' and * 'available' properties, described in the zfs(1m) manpage. * * usedobjs and availobjs are the number of objects currently allocated, * and available. */ void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); /* * The fsid_guid is a 56-bit ID that can change to avoid collisions. * (Contrast with the ds_guid which is a 64-bit ID that will never * change, so there is a small probability that it will collide.) */ uint64_t dmu_objset_fsid_guid(objset_t *os); /* * Get the [cm]time for an objset's snapshot dir */ timestruc_t dmu_objset_snap_cmtime(objset_t *os); int dmu_objset_is_snapshot(objset_t *os); extern struct spa *dmu_objset_spa(objset_t *os); extern struct zilog *dmu_objset_zil(objset_t *os); extern struct dsl_pool *dmu_objset_pool(objset_t *os); extern struct dsl_dataset *dmu_objset_ds(objset_t *os); extern void dmu_objset_name(objset_t *os, char *buf); extern dmu_objset_type_t dmu_objset_type(objset_t *os); extern uint64_t dmu_objset_id(objset_t *os); extern uint64_t dmu_objset_syncprop(objset_t *os); extern uint64_t dmu_objset_logbias(objset_t *os); extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name, uint64_t *id, uint64_t *offp, boolean_t *case_conflict); extern int dmu_snapshot_id(objset_t *os, const char *snapname, uint64_t *idp); extern int dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, boolean_t *conflict); extern int dmu_dir_list_next(objset_t *os, int namelen, char *name, uint64_t *idp, uint64_t *offp); typedef int objset_used_cb_t(dmu_object_type_t bonustype, void *bonus, uint64_t *userp, uint64_t *groupp); extern void dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb); extern void dmu_objset_set_user(objset_t *os, void *user_ptr); extern void *dmu_objset_get_user(objset_t *os); /* * Return the txg number for the given assigned transaction. */ uint64_t dmu_tx_get_txg(dmu_tx_t *tx); /* * Synchronous write. * If a parent zio is provided this function initiates a write on the * provided buffer as a child of the parent zio. * In the absence of a parent zio, the write is completed synchronously. * At write completion, blk is filled with the bp of the written block. * Note that while the data covered by this function will be on stable * storage when the write completes this new data does not become a * permanent part of the file until the associated transaction commits. */ /* * {zfs,zvol,ztest}_get_done() args */ typedef struct zgd { struct zilog *zgd_zilog; struct blkptr *zgd_bp; dmu_buf_t *zgd_db; struct rl *zgd_rl; void *zgd_private; } zgd_t; typedef void dmu_sync_cb_t(zgd_t *arg, int error); int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd); /* * Find the next hole or data block in file starting at *off * Return found offset in *off. Return ESRCH for end of file. */ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off); /* * Initial setup and final teardown. */ extern void dmu_init(void); extern void dmu_fini(void); typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, uint64_t object, uint64_t offset, int len); void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, struct vnode *vp, offset_t *off); +int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign, + uint64_t *sizep); typedef struct dmu_recv_cookie { /* * This structure is opaque! * * If logical and real are different, we are recving the stream * into the "real" temporary clone, and then switching it with * the "logical" target. */ struct dsl_dataset *drc_logical_ds; struct dsl_dataset *drc_real_ds; struct drr_begin *drc_drrb; char *drc_tosnap; char *drc_top_ds; boolean_t drc_newfs; boolean_t drc_force; struct avl_tree *drc_guid_to_ds_map; } dmu_recv_cookie_t; int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, boolean_t force, objset_t *origin, dmu_recv_cookie_t *); int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, int cleanup_fd, uint64_t *action_handlep); int dmu_recv_end(dmu_recv_cookie_t *drc); int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *off); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ extern uint64_t zfs_crc64_table[256]; #ifdef __cplusplus } #endif #endif /* _SYS_DMU_H */ diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 22733d070e8b..c4530a8f0ae7 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -1,283 +1,288 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H #define _SYS_DSL_DATASET_H #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif struct dsl_dataset; struct dsl_dir; struct dsl_pool; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) /* * NB: nopromote can not yet be set, but we want support for it in this * on-disk version, so that we don't need to upgrade for it later. It * will be needed when we implement 'zfs split' (where the split off * clone should not be promoted). */ #define DS_FLAG_NOPROMOTE (1ULL<<1) /* * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE, * refquota/refreservations). */ #define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2) /* * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called * on a dataset. This allows the dataset to be destroyed using 'zfs release'. */ #define DS_FLAG_DEFER_DESTROY (1ULL<<3) #define DS_IS_DEFER_DESTROY(ds) \ ((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY) /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * name lookups should be performed case-insensitively. */ #define DS_FLAG_CI_DATASET (1ULL<<16) typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */ uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */ uint64_t ds_prev_snap_txg; uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */ uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */ uint64_t ds_num_children; /* clone/snap children; ==0 for head */ uint64_t ds_creation_time; /* seconds since 1970 */ uint64_t ds_creation_txg; uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */ uint64_t ds_used_bytes; uint64_t ds_compressed_bytes; uint64_t ds_uncompressed_bytes; uint64_t ds_unique_bytes; /* only relevant to snapshots */ /* * The ds_fsid_guid is a 56-bit ID that can change to avoid * collisions. The ds_guid is a 64-bit ID that will never * change, so there is a small probability that it will collide. */ uint64_t ds_fsid_guid; uint64_t ds_guid; uint64_t ds_flags; /* DS_FLAG_* */ blkptr_t ds_bp; uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */ uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */ uint64_t ds_userrefs_obj; /* DMU_OT_USERREFS */ uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */ } dsl_dataset_phys_t; typedef struct dsl_dataset { /* Immutable: */ struct dsl_dir *ds_dir; dsl_dataset_phys_t *ds_phys; dmu_buf_t *ds_dbuf; uint64_t ds_object; uint64_t ds_fsid_guid; /* only used in syncing context, only valid for non-snapshots: */ struct dsl_dataset *ds_prev; /* has internal locking: */ dsl_deadlist_t ds_deadlist; bplist_t ds_pending_deadlist; /* to protect against multiple concurrent incremental recv */ kmutex_t ds_recvlock; /* protected by lock on pool's dp_dirty_datasets list */ txg_node_t ds_dirty_link; list_node_t ds_synced_link; /* * ds_phys->ds_ is also protected by ds_lock. * Protected by ds_lock: */ kmutex_t ds_lock; objset_t *ds_objset; uint64_t ds_userrefs; /* * ds_owner is protected by the ds_rwlock and the ds_lock */ krwlock_t ds_rwlock; kcondvar_t ds_exclusive_cv; void *ds_owner; /* no locking; only for making guesses */ uint64_t ds_trysnap_txg; /* for objset_open() */ kmutex_t ds_opening_lock; uint64_t ds_reserved; /* cached refreservation */ uint64_t ds_quota; /* cached refquota */ /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; struct dsl_ds_destroyarg { dsl_dataset_t *ds; /* ds to destroy */ dsl_dataset_t *rm_origin; /* also remove our origin? */ boolean_t is_origin_rm; /* set if removing origin snap */ boolean_t defer; /* destroy -d requested? */ boolean_t releasing; /* destroying due to release? */ boolean_t need_prep; /* do we need to retry due to EBUSY? */ }; /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. */ #define MAX_TAG_PREFIX_LEN 17 struct dsl_ds_holdarg { dsl_sync_task_group_t *dstg; char *htag; char *snapname; boolean_t recursive; boolean_t gotone; boolean_t temphold; char failed[MAXPATHLEN]; }; #define dsl_dataset_is_snapshot(ds) \ ((ds)->ds_phys->ds_num_children != 0) #define DS_UNIQUE_IS_ACCURATE(ds) \ (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); int dsl_dataset_own(const char *name, boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); void dsl_dataset_name(dsl_dataset_t *ds, char *name); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag); void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx); int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); dsl_checkfunc_t dsl_dataset_destroy_check; dsl_syncfunc_t dsl_dataset_destroy_sync; dsl_checkfunc_t dsl_dataset_snapshot_check; dsl_syncfunc_t dsl_dataset_snapshot_sync; dsl_syncfunc_t dsl_dataset_user_hold_sync; int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force); int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, boolean_t recursive, boolean_t temphold, int cleanup_fd); int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, boolean_t temphold); int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, boolean_t recursive); int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, char *htag, boolean_t retry); int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx); spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds); boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds); void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx); void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx); int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, boolean_t async); boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, uint64_t blk_birth); uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds); void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx); void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv); void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat); void dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds); +int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv); int dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota); dsl_syncfunc_t dsl_dataset_set_quota_sync; int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, uint64_t reservation); int dsl_destroy_inconsistent(const char *dsname, void *arg); #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \ dsl_dataset_name(ds, __ds_name); \ dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \ kmem_free(__ds_name, MAXNAMELEN); \ } \ _NOTE(CONSTCOND) } while (0) #else #define dprintf_ds(dd, fmt, ...) #endif #ifdef __cplusplus } #endif #endif /* _SYS_DSL_DATASET_H */ diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h index 73c43bd23879..9db6d07e87e7 100644 --- a/include/sys/dsl_deleg.h +++ b/include/sys/dsl_deleg.h @@ -1,78 +1,80 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H #define _SYS_DSL_DELEG_H #include #include #include #ifdef __cplusplus extern "C" { #endif #define ZFS_DELEG_PERM_NONE "" #define ZFS_DELEG_PERM_CREATE "create" #define ZFS_DELEG_PERM_DESTROY "destroy" #define ZFS_DELEG_PERM_SNAPSHOT "snapshot" #define ZFS_DELEG_PERM_ROLLBACK "rollback" #define ZFS_DELEG_PERM_CLONE "clone" #define ZFS_DELEG_PERM_PROMOTE "promote" #define ZFS_DELEG_PERM_RENAME "rename" #define ZFS_DELEG_PERM_MOUNT "mount" #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" #define ZFS_DELEG_PERM_RECEIVE "receive" #define ZFS_DELEG_PERM_ALLOW "allow" #define ZFS_DELEG_PERM_USERPROP "userprop" #define ZFS_DELEG_PERM_VSCAN "vscan" #define ZFS_DELEG_PERM_USERQUOTA "userquota" #define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" #define ZFS_DELEG_PERM_USERUSED "userused" #define ZFS_DELEG_PERM_GROUPUSED "groupused" #define ZFS_DELEG_PERM_HOLD "hold" #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" /* * Note: the names of properties that are marked delegatable are also * valid delegated permissions */ int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); -int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent, + const char *perm, cred_t *cr); void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx); boolean_t dsl_delegation_on(objset_t *os); #ifdef __cplusplus } #endif #endif /* _SYS_DSL_DELEG_H */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 032a952afdf8..86d7ad515313 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1,933 +1,938 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ #ifndef _SYS_FS_ZFS_H #define _SYS_FS_ZFS_H #include #ifdef __cplusplus extern "C" { #endif /* * Types and constants shared between userland and the kernel. */ /* * Each dataset can be one of the following types. These constants can be * combined into masks that can be passed to various functions. */ typedef enum { ZFS_TYPE_FILESYSTEM = 0x1, ZFS_TYPE_SNAPSHOT = 0x2, ZFS_TYPE_VOLUME = 0x4, ZFS_TYPE_POOL = 0x8 } zfs_type_t; #define ZFS_TYPE_DATASET \ (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) #define ZAP_MAXNAMELEN 256 #define ZAP_MAXVALUELEN (1024 * 8) #define ZAP_OLDMAXVALUELEN 1024 /* * Dataset properties are identified by these constants and must be added to * the end of this list to ensure that external consumers are not affected * by the change. If you make any changes to this list, be sure to update * the property table in usr/src/common/zfs/zfs_prop.c. */ typedef enum { ZFS_PROP_TYPE, ZFS_PROP_CREATION, ZFS_PROP_USED, ZFS_PROP_AVAILABLE, ZFS_PROP_REFERENCED, ZFS_PROP_COMPRESSRATIO, ZFS_PROP_MOUNTED, ZFS_PROP_ORIGIN, ZFS_PROP_QUOTA, ZFS_PROP_RESERVATION, ZFS_PROP_VOLSIZE, ZFS_PROP_VOLBLOCKSIZE, ZFS_PROP_RECORDSIZE, ZFS_PROP_MOUNTPOINT, ZFS_PROP_SHARENFS, ZFS_PROP_CHECKSUM, ZFS_PROP_COMPRESSION, ZFS_PROP_ATIME, ZFS_PROP_DEVICES, ZFS_PROP_EXEC, ZFS_PROP_SETUID, ZFS_PROP_READONLY, ZFS_PROP_ZONED, ZFS_PROP_SNAPDIR, ZFS_PROP_PRIVATE, /* not exposed to user, temporary */ ZFS_PROP_ACLINHERIT, ZFS_PROP_CREATETXG, /* not exposed to the user */ ZFS_PROP_NAME, /* not exposed to the user */ ZFS_PROP_CANMOUNT, ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */ ZFS_PROP_XATTR, ZFS_PROP_NUMCLONES, /* not exposed to the user */ ZFS_PROP_COPIES, ZFS_PROP_VERSION, ZFS_PROP_UTF8ONLY, ZFS_PROP_NORMALIZE, ZFS_PROP_CASE, ZFS_PROP_VSCAN, ZFS_PROP_NBMAND, ZFS_PROP_SHARESMB, ZFS_PROP_REFQUOTA, ZFS_PROP_REFRESERVATION, ZFS_PROP_GUID, ZFS_PROP_PRIMARYCACHE, ZFS_PROP_SECONDARYCACHE, ZFS_PROP_USEDSNAP, ZFS_PROP_USEDDS, ZFS_PROP_USEDCHILD, ZFS_PROP_USEDREFRESERV, ZFS_PROP_USERACCOUNTING, /* not exposed to the user */ ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */ ZFS_PROP_DEFER_DESTROY, ZFS_PROP_USERREFS, ZFS_PROP_LOGBIAS, ZFS_PROP_UNIQUE, /* not exposed to the user */ ZFS_PROP_OBJSETID, /* not exposed to the user */ ZFS_PROP_DEDUP, ZFS_PROP_MLSLABEL, ZFS_PROP_SYNC, ZFS_PROP_REFRATIO, + ZFS_PROP_WRITTEN, + ZFS_PROP_CLONES, ZFS_NUM_PROPS } zfs_prop_t; typedef enum { ZFS_PROP_USERUSED, ZFS_PROP_USERQUOTA, ZFS_PROP_GROUPUSED, ZFS_PROP_GROUPQUOTA, ZFS_NUM_USERQUOTA_PROPS } zfs_userquota_prop_t; extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS]; /* * Pool properties are identified by these constants and must be added to the * end of this list to ensure that external consumers are not affected * by the change. If you make any changes to this list, be sure to update * the property table in usr/src/common/zfs/zpool_prop.c. */ typedef enum { ZPOOL_PROP_NAME, ZPOOL_PROP_SIZE, ZPOOL_PROP_CAPACITY, ZPOOL_PROP_ALTROOT, ZPOOL_PROP_HEALTH, ZPOOL_PROP_GUID, ZPOOL_PROP_VERSION, ZPOOL_PROP_BOOTFS, ZPOOL_PROP_DELEGATION, ZPOOL_PROP_AUTOREPLACE, ZPOOL_PROP_CACHEFILE, ZPOOL_PROP_FAILUREMODE, ZPOOL_PROP_LISTSNAPS, ZPOOL_PROP_AUTOEXPAND, ZPOOL_PROP_DEDUPDITTO, ZPOOL_PROP_DEDUPRATIO, ZPOOL_PROP_FREE, ZPOOL_PROP_ALLOCATED, ZPOOL_PROP_READONLY, ZPOOL_PROP_ASHIFT, ZPOOL_NUM_PROPS } zpool_prop_t; #define ZPROP_CONT -2 #define ZPROP_INVAL -1 #define ZPROP_VALUE "value" #define ZPROP_SOURCE "source" typedef enum { ZPROP_SRC_NONE = 0x1, ZPROP_SRC_DEFAULT = 0x2, ZPROP_SRC_TEMPORARY = 0x4, ZPROP_SRC_LOCAL = 0x8, ZPROP_SRC_INHERITED = 0x10, ZPROP_SRC_RECEIVED = 0x20 } zprop_source_t; #define ZPROP_SRC_ALL 0x3f #define ZPROP_SOURCE_VAL_RECVD "$recvd" #define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS" /* * Dataset flag implemented as a special entry in the props zap object * indicating that the dataset has received properties on or after * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties * just as it did in earlier versions, and thereafter, local properties are * preserved. */ #define ZPROP_HAS_RECVD "$hasrecvd" typedef enum { ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */ ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */ } zprop_errflags_t; typedef int (*zprop_func)(int, void *); /* * Properties to be set on the root file system of a new pool * are stuffed into their own nvlist, which is then included in * the properties nvlist with the pool properties. */ #define ZPOOL_ROOTFS_PROPS "root-props-nvl" /* * Dataset property functions shared between libzfs and kernel. */ const char *zfs_prop_default_string(zfs_prop_t); uint64_t zfs_prop_default_numeric(zfs_prop_t); boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t); const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); boolean_t zfs_prop_userquota(const char *); +boolean_t zfs_prop_written(const char *); int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **); int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *); uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed); boolean_t zfs_prop_valid_for_type(int, zfs_type_t); /* * Pool property functions shared between libzfs and kernel. */ zpool_prop_t zpool_name_to_prop(const char *); const char *zpool_prop_to_name(zpool_prop_t); const char *zpool_prop_default_string(zpool_prop_t); uint64_t zpool_prop_default_numeric(zpool_prop_t); boolean_t zpool_prop_readonly(zpool_prop_t); int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **); int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *); uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed); /* * Definitions for the Delegation. */ typedef enum { ZFS_DELEG_WHO_UNKNOWN = 0, ZFS_DELEG_USER = 'u', ZFS_DELEG_USER_SETS = 'U', ZFS_DELEG_GROUP = 'g', ZFS_DELEG_GROUP_SETS = 'G', ZFS_DELEG_EVERYONE = 'e', ZFS_DELEG_EVERYONE_SETS = 'E', ZFS_DELEG_CREATE = 'c', ZFS_DELEG_CREATE_SETS = 'C', ZFS_DELEG_NAMED_SET = 's', ZFS_DELEG_NAMED_SET_SETS = 'S' } zfs_deleg_who_type_t; typedef enum { ZFS_DELEG_NONE = 0, ZFS_DELEG_PERM_LOCAL = 1, ZFS_DELEG_PERM_DESCENDENT = 2, ZFS_DELEG_PERM_LOCALDESCENDENT = 3, ZFS_DELEG_PERM_CREATE = 4 } zfs_deleg_inherit_t; #define ZFS_DELEG_PERM_UID "uid" #define ZFS_DELEG_PERM_GID "gid" #define ZFS_DELEG_PERM_GROUPS "groups" #define ZFS_MLSLABEL_DEFAULT "none" #define ZFS_SMB_ACL_SRC "src" #define ZFS_SMB_ACL_TARGET "target" typedef enum { ZFS_CANMOUNT_OFF = 0, ZFS_CANMOUNT_ON = 1, ZFS_CANMOUNT_NOAUTO = 2 } zfs_canmount_type_t; typedef enum { ZFS_LOGBIAS_LATENCY = 0, ZFS_LOGBIAS_THROUGHPUT = 1 } zfs_logbias_op_t; typedef enum zfs_share_op { ZFS_SHARE_NFS = 0, ZFS_UNSHARE_NFS = 1, ZFS_SHARE_SMB = 2, ZFS_UNSHARE_SMB = 3 } zfs_share_op_t; typedef enum zfs_smb_acl_op { ZFS_SMB_ACL_ADD, ZFS_SMB_ACL_REMOVE, ZFS_SMB_ACL_RENAME, ZFS_SMB_ACL_PURGE } zfs_smb_acl_op_t; typedef enum zfs_cache_type { ZFS_CACHE_NONE = 0, ZFS_CACHE_METADATA = 1, ZFS_CACHE_ALL = 2 } zfs_cache_type_t; typedef enum { ZFS_SYNC_STANDARD = 0, ZFS_SYNC_ALWAYS = 1, ZFS_SYNC_DISABLED = 2 } zfs_sync_type_t; typedef enum { ZFS_XATTR_OFF = 0, ZFS_XATTR_DIR = 1, ZFS_XATTR_SA = 2 } zfs_xattr_type_t; /* * On-disk version number. */ #define SPA_VERSION_1 1ULL #define SPA_VERSION_2 2ULL #define SPA_VERSION_3 3ULL #define SPA_VERSION_4 4ULL #define SPA_VERSION_5 5ULL #define SPA_VERSION_6 6ULL #define SPA_VERSION_7 7ULL #define SPA_VERSION_8 8ULL #define SPA_VERSION_9 9ULL #define SPA_VERSION_10 10ULL #define SPA_VERSION_11 11ULL #define SPA_VERSION_12 12ULL #define SPA_VERSION_13 13ULL #define SPA_VERSION_14 14ULL #define SPA_VERSION_15 15ULL #define SPA_VERSION_16 16ULL #define SPA_VERSION_17 17ULL #define SPA_VERSION_18 18ULL #define SPA_VERSION_19 19ULL #define SPA_VERSION_20 20ULL #define SPA_VERSION_21 21ULL #define SPA_VERSION_22 22ULL #define SPA_VERSION_23 23ULL #define SPA_VERSION_24 24ULL #define SPA_VERSION_25 25ULL #define SPA_VERSION_26 26ULL #define SPA_VERSION_27 27ULL #define SPA_VERSION_28 28ULL /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ #define SPA_VERSION SPA_VERSION_28 #define SPA_VERSION_STRING "28" /* * Symbolic names for the changes that caused a SPA_VERSION switch. * Used in the code when checking for presence or absence of a feature. * Feel free to define multiple symbolic names for each version if there * were multiple changes to on-disk structures during that version. * * NOTE: When checking the current SPA_VERSION in your code, be sure * to use spa_version() since it reports the version of the * last synced uberblock. Checking the in-flight version can * be dangerous in some cases. */ #define SPA_VERSION_INITIAL SPA_VERSION_1 #define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2 #define SPA_VERSION_SPARES SPA_VERSION_3 #define SPA_VERSION_RAIDZ2 SPA_VERSION_3 #define SPA_VERSION_BPOBJ_ACCOUNT SPA_VERSION_3 #define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3 #define SPA_VERSION_DNODE_BYTES SPA_VERSION_3 #define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4 #define SPA_VERSION_GZIP_COMPRESSION SPA_VERSION_5 #define SPA_VERSION_BOOTFS SPA_VERSION_6 #define SPA_VERSION_SLOGS SPA_VERSION_7 #define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8 #define SPA_VERSION_FUID SPA_VERSION_9 #define SPA_VERSION_REFRESERVATION SPA_VERSION_9 #define SPA_VERSION_REFQUOTA SPA_VERSION_9 #define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9 #define SPA_VERSION_L2CACHE SPA_VERSION_10 #define SPA_VERSION_NEXT_CLONES SPA_VERSION_11 #define SPA_VERSION_ORIGIN SPA_VERSION_11 #define SPA_VERSION_DSL_SCRUB SPA_VERSION_11 #define SPA_VERSION_SNAP_PROPS SPA_VERSION_12 #define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13 #define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14 #define SPA_VERSION_USERSPACE SPA_VERSION_15 #define SPA_VERSION_STMF_PROP SPA_VERSION_16 #define SPA_VERSION_RAIDZ3 SPA_VERSION_17 #define SPA_VERSION_USERREFS SPA_VERSION_18 #define SPA_VERSION_HOLES SPA_VERSION_19 #define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20 #define SPA_VERSION_DEDUP SPA_VERSION_21 #define SPA_VERSION_RECVD_PROPS SPA_VERSION_22 #define SPA_VERSION_SLIM_ZIL SPA_VERSION_23 #define SPA_VERSION_SA SPA_VERSION_24 #define SPA_VERSION_SCAN SPA_VERSION_25 #define SPA_VERSION_DIR_CLONES SPA_VERSION_26 #define SPA_VERSION_DEADLISTS SPA_VERSION_26 #define SPA_VERSION_FAST_SNAP SPA_VERSION_27 #define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 /* * ZPL version - rev'd whenever an incompatible on-disk format change * occurs. This is independent of SPA/DMU/ZAP versioning. You must * also update the version_table[] and help message in zfs_prop.c. * * When changing, be sure to teach GRUB how to read the new format! * See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*} */ #define ZPL_VERSION_1 1ULL #define ZPL_VERSION_2 2ULL #define ZPL_VERSION_3 3ULL #define ZPL_VERSION_4 4ULL #define ZPL_VERSION_5 5ULL #define ZPL_VERSION ZPL_VERSION_5 #define ZPL_VERSION_STRING "5" #define ZPL_VERSION_INITIAL ZPL_VERSION_1 #define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2 #define ZPL_VERSION_FUID ZPL_VERSION_3 #define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3 #define ZPL_VERSION_SYSATTR ZPL_VERSION_3 #define ZPL_VERSION_USERSPACE ZPL_VERSION_4 #define ZPL_VERSION_SA ZPL_VERSION_5 /* Rewind request information */ #define ZPOOL_NO_REWIND 1 /* No policy - default behavior */ #define ZPOOL_NEVER_REWIND 2 /* Do not search for best txg or rewind */ #define ZPOOL_TRY_REWIND 4 /* Search for best txg, but do not rewind */ #define ZPOOL_DO_REWIND 8 /* Rewind to best txg w/in deferred frees */ #define ZPOOL_EXTREME_REWIND 16 /* Allow extreme measures to find best txg */ #define ZPOOL_REWIND_MASK 28 /* All the possible rewind bits */ #define ZPOOL_REWIND_POLICIES 31 /* All the possible policy bits */ typedef struct zpool_rewind_policy { uint32_t zrp_request; /* rewind behavior requested */ uint64_t zrp_maxmeta; /* max acceptable meta-data errors */ uint64_t zrp_maxdata; /* max acceptable data errors */ uint64_t zrp_txg; /* specific txg to load */ } zpool_rewind_policy_t; /* * The following are configuration names used in the nvlist describing a pool's * configuration. */ #define ZPOOL_CONFIG_VERSION "version" #define ZPOOL_CONFIG_POOL_NAME "name" #define ZPOOL_CONFIG_POOL_STATE "state" #define ZPOOL_CONFIG_POOL_TXG "txg" #define ZPOOL_CONFIG_POOL_GUID "pool_guid" #define ZPOOL_CONFIG_CREATE_TXG "create_txg" #define ZPOOL_CONFIG_TOP_GUID "top_guid" #define ZPOOL_CONFIG_VDEV_TREE "vdev_tree" #define ZPOOL_CONFIG_TYPE "type" #define ZPOOL_CONFIG_CHILDREN "children" #define ZPOOL_CONFIG_ID "id" #define ZPOOL_CONFIG_GUID "guid" #define ZPOOL_CONFIG_PATH "path" #define ZPOOL_CONFIG_DEVID "devid" #define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" #define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" #define ZPOOL_CONFIG_ASHIFT "ashift" #define ZPOOL_CONFIG_ASIZE "asize" #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */ #define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */ #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" #define ZPOOL_CONFIG_IS_SPARE "is_spare" #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_HOSTID "hostid" #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_LOADED_TIME "initial_load_time" #define ZPOOL_CONFIG_UNSPARE "unspare" #define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_IS_LOG "is_log" #define ZPOOL_CONFIG_L2CACHE "l2cache" #define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" #define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" #define ZPOOL_CONFIG_IS_HOLE "is_hole" #define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" #define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" #define ZPOOL_CONFIG_DDT_STATS "ddt_stats" #define ZPOOL_CONFIG_SPLIT "splitcfg" #define ZPOOL_CONFIG_ORIG_GUID "orig_guid" #define ZPOOL_CONFIG_SPLIT_GUID "split_guid" #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" #define ZPOOL_CONFIG_REMOVING "removing" #define ZPOOL_CONFIG_RESILVERING "resilvering" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ #define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */ #define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such * as offline and degraded. */ #define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_FAULTED "faulted" #define ZPOOL_CONFIG_DEGRADED "degraded" #define ZPOOL_CONFIG_REMOVED "removed" #define ZPOOL_CONFIG_FRU "fru" #define ZPOOL_CONFIG_AUX_STATE "aux_state" /* Rewind policy parameters */ #define ZPOOL_REWIND_POLICY "rewind-policy" #define ZPOOL_REWIND_REQUEST "rewind-request" #define ZPOOL_REWIND_REQUEST_TXG "rewind-request-txg" #define ZPOOL_REWIND_META_THRESH "rewind-meta-thresh" #define ZPOOL_REWIND_DATA_THRESH "rewind-data-thresh" /* Rewind data discovered */ #define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts" #define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors" #define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" #define VDEV_TYPE_RAIDZ "raidz" #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" #define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" /* * This is needed in userland to report the minimum necessary device size. */ #define SPA_MINDEVSIZE (64ULL << 20) /* * The location of the pool configuration repository, shared between kernel and * userland. */ #define ZPOOL_CACHE "/etc/zfs/zpool.cache" /* * vdev states are ordered from least to most healthy. * A vdev that's CANT_OPEN or below is considered unusable. */ typedef enum vdev_state { VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ VDEV_STATE_CLOSED, /* Not currently open */ VDEV_STATE_OFFLINE, /* Not allowed to open */ VDEV_STATE_REMOVED, /* Explicitly removed from system */ VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ VDEV_STATE_FAULTED, /* External request to fault device */ VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ VDEV_STATE_HEALTHY /* Presumed good */ } vdev_state_t; #define VDEV_STATE_ONLINE VDEV_STATE_HEALTHY /* * vdev aux states. When a vdev is in the CANT_OPEN state, the aux field * of the vdev stats structure uses these constants to distinguish why. */ typedef enum vdev_aux { VDEV_AUX_NONE, /* no error */ VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */ VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */ VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */ VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */ VDEV_AUX_TOO_SMALL, /* vdev size is too small */ VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ VDEV_AUX_SPARED, /* hot spare used in another pool */ VDEV_AUX_ERR_EXCEEDED, /* too many errors */ VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ VDEV_AUX_EXTERNAL, /* external diagnosis */ VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */ } vdev_aux_t; /* * pool state. The following states are written to disk as part of the normal * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE. The remaining * states are software abstractions used at various levels to communicate * pool state. */ typedef enum pool_state { POOL_STATE_ACTIVE = 0, /* In active use */ POOL_STATE_EXPORTED, /* Explicitly exported */ POOL_STATE_DESTROYED, /* Explicitly destroyed */ POOL_STATE_SPARE, /* Reserved for hot spare use */ POOL_STATE_L2CACHE, /* Level 2 ARC device */ POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ POOL_STATE_UNAVAIL, /* Internal libzfs state */ POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ } pool_state_t; /* * Scan Functions. */ typedef enum pool_scan_func { POOL_SCAN_NONE, POOL_SCAN_SCRUB, POOL_SCAN_RESILVER, POOL_SCAN_FUNCS } pool_scan_func_t; /* * ZIO types. Needed to interpret vdev statistics below. */ typedef enum zio_type { ZIO_TYPE_NULL = 0, ZIO_TYPE_READ, ZIO_TYPE_WRITE, ZIO_TYPE_FREE, ZIO_TYPE_CLAIM, ZIO_TYPE_IOCTL, ZIO_TYPES } zio_type_t; /* * Pool statistics. Note: all fields should be 64-bit because this * is passed between kernel and userland as an nvlist uint64 array. */ typedef struct pool_scan_stat { /* values stored on disk */ uint64_t pss_func; /* pool_scan_func_t */ uint64_t pss_state; /* dsl_scan_state_t */ uint64_t pss_start_time; /* scan start time */ uint64_t pss_end_time; /* scan end time */ uint64_t pss_to_examine; /* total bytes to scan */ uint64_t pss_examined; /* total examined bytes */ uint64_t pss_to_process; /* total bytes to process */ uint64_t pss_processed; /* total processed bytes */ uint64_t pss_errors; /* scan errors */ /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ } pool_scan_stat_t; typedef enum dsl_scan_state { DSS_NONE, DSS_SCANNING, DSS_FINISHED, DSS_CANCELED, DSS_NUM_STATES } dsl_scan_state_t; /* * Vdev statistics. Note: all fields should be 64-bit because this * is passed between kernel and userland as an nvlist uint64 array. */ typedef struct vdev_stat { hrtime_t vs_timestamp; /* time since vdev load */ uint64_t vs_state; /* vdev state */ uint64_t vs_aux; /* see vdev_aux_t */ uint64_t vs_alloc; /* space allocated */ uint64_t vs_space; /* total capacity */ uint64_t vs_dspace; /* deflated capacity */ uint64_t vs_rsize; /* replaceable dev size */ uint64_t vs_ops[ZIO_TYPES]; /* operation count */ uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ uint64_t vs_read_errors; /* read errors */ uint64_t vs_write_errors; /* write errors */ uint64_t vs_checksum_errors; /* checksum errors */ uint64_t vs_self_healed; /* self-healed bytes */ uint64_t vs_scan_removing; /* removing? */ uint64_t vs_scan_processed; /* scan processed bytes */ } vdev_stat_t; /* * DDT statistics. Note: all fields should be 64-bit because this * is passed between kernel and userland as an nvlist uint64 array. */ typedef struct ddt_object { uint64_t ddo_count; /* number of elments in ddt */ uint64_t ddo_dspace; /* size of ddt on disk */ uint64_t ddo_mspace; /* size of ddt in-core */ } ddt_object_t; typedef struct ddt_stat { uint64_t dds_blocks; /* blocks */ uint64_t dds_lsize; /* logical size */ uint64_t dds_psize; /* physical size */ uint64_t dds_dsize; /* deflated allocated size */ uint64_t dds_ref_blocks; /* referenced blocks */ uint64_t dds_ref_lsize; /* referenced lsize * refcnt */ uint64_t dds_ref_psize; /* referenced psize * refcnt */ uint64_t dds_ref_dsize; /* referenced dsize * refcnt */ } ddt_stat_t; typedef struct ddt_histogram { ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */ } ddt_histogram_t; #define ZVOL_DRIVER "zvol" #define ZFS_DRIVER "zfs" #define ZFS_DEV "/dev/zfs" /* general zvol path */ #define ZVOL_DIR "/dev" #define ZVOL_MAJOR 230 #define ZVOL_MINOR_BITS 4 #define ZVOL_MINOR_MASK ((1U << ZVOL_MINOR_BITS) - 1) #define ZVOL_MINORS (1 << 4) #define ZVOL_DEV_NAME "zd" #define ZVOL_PROP_NAME "name" #define ZVOL_DEFAULT_BLOCKSIZE 8192 /* * /dev/zfs ioctl numbers. */ #define ZFS_IOC ('Z' << 8) typedef enum zfs_ioc { ZFS_IOC_POOL_CREATE = ZFS_IOC, ZFS_IOC_POOL_DESTROY, ZFS_IOC_POOL_IMPORT, ZFS_IOC_POOL_EXPORT, ZFS_IOC_POOL_CONFIGS, ZFS_IOC_POOL_STATS, ZFS_IOC_POOL_TRYIMPORT, ZFS_IOC_POOL_SCAN, ZFS_IOC_POOL_FREEZE, ZFS_IOC_POOL_UPGRADE, ZFS_IOC_POOL_GET_HISTORY, ZFS_IOC_VDEV_ADD, ZFS_IOC_VDEV_REMOVE, ZFS_IOC_VDEV_SET_STATE, ZFS_IOC_VDEV_ATTACH, ZFS_IOC_VDEV_DETACH, ZFS_IOC_VDEV_SETPATH, ZFS_IOC_VDEV_SETFRU, ZFS_IOC_OBJSET_STATS, ZFS_IOC_OBJSET_ZPLPROPS, ZFS_IOC_DATASET_LIST_NEXT, ZFS_IOC_SNAPSHOT_LIST_NEXT, ZFS_IOC_SET_PROP, ZFS_IOC_CREATE_MINOR, ZFS_IOC_REMOVE_MINOR, ZFS_IOC_CREATE, ZFS_IOC_DESTROY, ZFS_IOC_ROLLBACK, ZFS_IOC_RENAME, ZFS_IOC_RECV, ZFS_IOC_SEND, ZFS_IOC_INJECT_FAULT, ZFS_IOC_CLEAR_FAULT, ZFS_IOC_INJECT_LIST_NEXT, ZFS_IOC_ERROR_LOG, ZFS_IOC_CLEAR, ZFS_IOC_PROMOTE, - ZFS_IOC_DESTROY_SNAPS, + ZFS_IOC_DESTROY_SNAPS_NVL, ZFS_IOC_SNAPSHOT, ZFS_IOC_DSOBJ_TO_DSNAME, ZFS_IOC_OBJ_TO_PATH, ZFS_IOC_POOL_SET_PROPS, ZFS_IOC_POOL_GET_PROPS, ZFS_IOC_SET_FSACL, ZFS_IOC_GET_FSACL, ZFS_IOC_SHARE, ZFS_IOC_INHERIT_PROP, ZFS_IOC_SMB_ACL, ZFS_IOC_USERSPACE_ONE, ZFS_IOC_USERSPACE_MANY, ZFS_IOC_USERSPACE_UPGRADE, ZFS_IOC_HOLD, ZFS_IOC_RELEASE, ZFS_IOC_GET_HOLDS, ZFS_IOC_OBJSET_RECVD_PROPS, ZFS_IOC_VDEV_SPLIT, ZFS_IOC_NEXT_OBJ, ZFS_IOC_DIFF, ZFS_IOC_TMP_SNAPSHOT, ZFS_IOC_OBJ_TO_STATS, - ZFS_IOC_POOL_REGUID, ZFS_IOC_EVENTS_NEXT, ZFS_IOC_EVENTS_CLEAR, + ZFS_IOC_POOL_REGUID, + ZFS_IOC_SPACE_WRITTEN, + ZFS_IOC_SPACE_SNAPS, } zfs_ioc_t; /* * zvol ioctl to get dataset name */ #define BLKZNAME _IOR(0x12,125,char[ZFS_MAXNAMELEN]) /* * Internal SPA load state. Used by FMA diagnosis engine. */ typedef enum { SPA_LOAD_NONE, /* no load in progress */ SPA_LOAD_OPEN, /* normal open */ SPA_LOAD_IMPORT, /* import in progress */ SPA_LOAD_TRYIMPORT, /* tryimport in progress */ SPA_LOAD_RECOVER, /* recovery requested */ SPA_LOAD_ERROR /* load failed */ } spa_load_state_t; /* * Bookmark name values. */ #define ZPOOL_ERR_LIST "error list" #define ZPOOL_ERR_DATASET "dataset" #define ZPOOL_ERR_OBJECT "object" #define HIS_MAX_RECORD_LEN (MAXPATHLEN + MAXPATHLEN + 1) /* * The following are names used in the nvlist describing * the pool's history log. */ #define ZPOOL_HIST_RECORD "history record" #define ZPOOL_HIST_TIME "history time" #define ZPOOL_HIST_CMD "history command" #define ZPOOL_HIST_WHO "history who" #define ZPOOL_HIST_ZONE "history zone" #define ZPOOL_HIST_HOST "history hostname" #define ZPOOL_HIST_TXG "history txg" #define ZPOOL_HIST_INT_EVENT "history internal event" #define ZPOOL_HIST_INT_STR "history internal str" /* * Flags for ZFS_IOC_VDEV_SET_STATE */ #define ZFS_ONLINE_CHECKREMOVE 0x1 #define ZFS_ONLINE_UNSPARE 0x2 #define ZFS_ONLINE_FORCEFAULT 0x4 #define ZFS_ONLINE_EXPAND 0x8 #define ZFS_OFFLINE_TEMPORARY 0x1 /* * Flags for ZFS_IOC_POOL_IMPORT */ #define ZFS_IMPORT_NORMAL 0x0 #define ZFS_IMPORT_VERBATIM 0x1 #define ZFS_IMPORT_ANY_HOST 0x2 #define ZFS_IMPORT_MISSING_LOG 0x4 #define ZFS_IMPORT_ONLY 0x8 /* * Sysevent payload members. ZFS will generate the following sysevents with the * given payloads: * * ESC_ZFS_RESILVER_START * ESC_ZFS_RESILVER_END * ESC_ZFS_POOL_DESTROY * ESC_ZFS_POOL_REGUID * * ZFS_EV_POOL_NAME DATA_TYPE_STRING * ZFS_EV_POOL_GUID DATA_TYPE_UINT64 * * ESC_ZFS_VDEV_REMOVE * ESC_ZFS_VDEV_CLEAR * ESC_ZFS_VDEV_CHECK * * ZFS_EV_POOL_NAME DATA_TYPE_STRING * ZFS_EV_POOL_GUID DATA_TYPE_UINT64 * ZFS_EV_VDEV_PATH DATA_TYPE_STRING (optional) * ZFS_EV_VDEV_GUID DATA_TYPE_UINT64 */ #define ZFS_EV_POOL_NAME "pool_name" #define ZFS_EV_POOL_GUID "pool_guid" #define ZFS_EV_VDEV_PATH "vdev_path" #define ZFS_EV_VDEV_GUID "vdev_guid" /* * Note: This is encoded on-disk, so new events must be added to the * end, and unused events can not be removed. Be sure to edit * libzfs_pool.c: hist_event_table[]. */ typedef enum history_internal_events { LOG_NO_EVENT = 0, LOG_POOL_CREATE, LOG_POOL_VDEV_ADD, LOG_POOL_REMOVE, LOG_POOL_DESTROY, LOG_POOL_EXPORT, LOG_POOL_IMPORT, LOG_POOL_VDEV_ATTACH, LOG_POOL_VDEV_REPLACE, LOG_POOL_VDEV_DETACH, LOG_POOL_VDEV_ONLINE, LOG_POOL_VDEV_OFFLINE, LOG_POOL_UPGRADE, LOG_POOL_CLEAR, LOG_POOL_SCAN, LOG_POOL_PROPSET, LOG_DS_CREATE, LOG_DS_CLONE, LOG_DS_DESTROY, LOG_DS_DESTROY_BEGIN, LOG_DS_INHERIT, LOG_DS_PROPSET, LOG_DS_QUOTA, LOG_DS_PERM_UPDATE, LOG_DS_PERM_REMOVE, LOG_DS_PERM_WHO_REMOVE, LOG_DS_PROMOTE, LOG_DS_RECEIVE, LOG_DS_RENAME, LOG_DS_RESERVATION, LOG_DS_REPLAY_INC_SYNC, LOG_DS_REPLAY_FULL_SYNC, LOG_DS_ROLLBACK, LOG_DS_SNAPSHOT, LOG_DS_UPGRADE, LOG_DS_REFQUOTA, LOG_DS_REFRESERV, LOG_POOL_SCAN_DONE, LOG_DS_USER_HOLD, LOG_DS_USER_RELEASE, LOG_POOL_SPLIT, LOG_END } history_internal_events_t; #ifdef __cplusplus } #endif #endif /* _SYS_FS_ZFS_H */ diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am index 4e3a4857330b..524efaa601f7 100644 --- a/lib/libzfs/Makefile.am +++ b/lib/libzfs/Makefile.am @@ -1,28 +1,29 @@ include $(top_srcdir)/config/Rules.am DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include lib_LTLIBRARIES = libzfs.la libzfs_la_SOURCES = \ $(top_srcdir)/lib/libzfs/libzfs_changelist.c \ $(top_srcdir)/lib/libzfs/libzfs_config.c \ $(top_srcdir)/lib/libzfs/libzfs_dataset.c \ $(top_srcdir)/lib/libzfs/libzfs_diff.c \ $(top_srcdir)/lib/libzfs/libzfs_fru.c \ $(top_srcdir)/lib/libzfs/libzfs_graph.c \ $(top_srcdir)/lib/libzfs/libzfs_import.c \ + $(top_srcdir)/lib/libzfs/libzfs_iter.c \ $(top_srcdir)/lib/libzfs/libzfs_mount.c \ $(top_srcdir)/lib/libzfs/libzfs_pool.c \ $(top_srcdir)/lib/libzfs/libzfs_sendrecv.c \ $(top_srcdir)/lib/libzfs/libzfs_status.c \ $(top_srcdir)/lib/libzfs/libzfs_util.c libzfs_la_LIBADD = \ $(top_builddir)/lib/libshare/libshare.la \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libzpool/libzpool.la libzfs_la_LDFLAGS = -lm -ldl -version-info 1:1:0 $(LIBSELINUX) diff --git a/lib/libzfs/Makefile.in b/lib/libzfs/Makefile.in index e05a8a4fe3a8..c2c1d2e4a240 100644 --- a/lib/libzfs/Makefile.in +++ b/lib/libzfs/Makefile.in @@ -1,803 +1,813 @@ # Makefile.in generated by automake 1.11.1 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, # Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(top_srcdir)/config/Rules.am subdir = lib/libzfs ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-automount.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ $(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \ $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ $(top_srcdir)/config/kernel-blk-queue-discard.m4 \ $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \ $(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \ $(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \ $(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \ $(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ $(top_srcdir)/config/kernel-blk-rq-sectors.m4 \ $(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \ $(top_srcdir)/config/kernel-blkdev-get.m4 \ $(top_srcdir)/config/kernel-check-disk-size-change.m4 \ $(top_srcdir)/config/kernel-clear-inode.m4 \ $(top_srcdir)/config/kernel-create-umode-t.m4 \ $(top_srcdir)/config/kernel-d-make-root.m4 \ $(top_srcdir)/config/kernel-d-obtain-alias.m4 \ $(top_srcdir)/config/kernel-encode-fh-inode.m4 \ $(top_srcdir)/config/kernel-evict-inode.m4 \ $(top_srcdir)/config/kernel-fallocate.m4 \ $(top_srcdir)/config/kernel-fmode-t.m4 \ $(top_srcdir)/config/kernel-fsync.m4 \ $(top_srcdir)/config/kernel-get-disk-ro.m4 \ $(top_srcdir)/config/kernel-get-gendisk.m4 \ $(top_srcdir)/config/kernel-insert-inode-locked.m4 \ $(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \ $(top_srcdir)/config/kernel-kobj-name-len.m4 \ $(top_srcdir)/config/kernel-mount-nodev.m4 \ $(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \ $(top_srcdir)/config/kernel-rq-for-each_segment.m4 \ $(top_srcdir)/config/kernel-rq-is_sync.m4 \ $(top_srcdir)/config/kernel-security-inode-init.m4 \ $(top_srcdir)/config/kernel-set-nlink.m4 \ $(top_srcdir)/config/kernel-show-options.m4 \ $(top_srcdir)/config/kernel-shrink.m4 \ $(top_srcdir)/config/kernel-truncate-range.m4 \ $(top_srcdir)/config/kernel-truncate-setsize.m4 \ $(top_srcdir)/config/kernel-xattr-handler.m4 \ $(top_srcdir)/config/kernel.m4 \ $(top_srcdir)/config/user-arch.m4 \ $(top_srcdir)/config/user-frame-larger-than.m4 \ $(top_srcdir)/config/user-ioctl.m4 \ $(top_srcdir)/config/user-libblkid.m4 \ $(top_srcdir)/config/user-libuuid.m4 \ $(top_srcdir)/config/user-nptl_guard_within_stack.m4 \ $(top_srcdir)/config/user-selinux.m4 \ $(top_srcdir)/config/user-udev.m4 \ $(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \ $(top_srcdir)/config/zfs-build.m4 \ $(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/zfs_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libzfs_la_DEPENDENCIES = $(top_builddir)/lib/libshare/libshare.la \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libzpool/libzpool.la am_libzfs_la_OBJECTS = libzfs_changelist.lo libzfs_config.lo \ libzfs_dataset.lo libzfs_diff.lo libzfs_fru.lo libzfs_graph.lo \ - libzfs_import.lo libzfs_mount.lo libzfs_pool.lo \ + libzfs_import.lo libzfs_iter.lo libzfs_mount.lo libzfs_pool.lo \ libzfs_sendrecv.lo libzfs_status.lo libzfs_util.lo libzfs_la_OBJECTS = $(am_libzfs_la_OBJECTS) AM_V_lt = $(am__v_lt_$(V)) am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY)) am__v_lt_0 = --silent libzfs_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libzfs_la_LDFLAGS) $(LDFLAGS) -o $@ depcomp = $(SHELL) $(top_srcdir)/config/depcomp am__depfiles_maybe = depfiles am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_$(V)) am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) am__v_CC_0 = @echo " CC " $@; AM_V_at = $(am__v_at_$(V)) am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) am__v_at_0 = @ CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_$(V)) am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY)) am__v_CCLD_0 = @echo " CCLD " $@; AM_V_GEN = $(am__v_GEN_$(V)) am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) am__v_GEN_0 = @echo " GEN " $@; SOURCES = $(libzfs_la_SOURCES) DIST_SOURCES = $(libzfs_la_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALIEN = @ALIEN@ ALIEN_VERSION = @ALIEN_VERSION@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEBUG_CFLAGS = @DEBUG_CFLAGS@ DEBUG_DMU_TX = @DEBUG_DMU_TX@ DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@ DEBUG_ZFS = @DEBUG_ZFS@ DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@ DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@ DEFAULT_PACKAGE = @DEFAULT_PACKAGE@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DPKG = @DPKG@ DPKGBUILD = @DPKGBUILD@ DPKGBUILD_VERSION = @DPKGBUILD_VERSION@ DPKG_VERSION = @DPKG_VERSION@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ FRAME_LARGER_THAN = @FRAME_LARGER_THAN@ GREP = @GREP@ HAVE_ALIEN = @HAVE_ALIEN@ HAVE_DPKG = @HAVE_DPKG@ HAVE_DPKGBUILD = @HAVE_DPKGBUILD@ HAVE_MAKEPKG = @HAVE_MAKEPKG@ HAVE_PACMAN = @HAVE_PACMAN@ HAVE_RPM = @HAVE_RPM@ HAVE_RPMBUILD = @HAVE_RPMBUILD@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ KERNELCPPFLAGS = @KERNELCPPFLAGS@ KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBBLKID = @LIBBLKID@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSELINUX = @LIBSELINUX@ LIBTOOL = @LIBTOOL@ LIBUUID = @LIBUUID@ LINUX = @LINUX@ LINUX_OBJ = @LINUX_OBJ@ LINUX_SYMBOLS = @LINUX_SYMBOLS@ LINUX_VERSION = @LINUX_VERSION@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MAKEPKG = @MAKEPKG@ MAKEPKG_VERSION = @MAKEPKG_VERSION@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PACMAN = @PACMAN@ PACMAN_VERSION = @PACMAN_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ RANLIB = @RANLIB@ RPM = @RPM@ RPMBUILD = @RPMBUILD@ RPMBUILD_VERSION = @RPMBUILD_VERSION@ RPM_VERSION = @RPM_VERSION@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SPL = @SPL@ SPL_OBJ = @SPL_OBJ@ SPL_SYMBOLS = @SPL_SYMBOLS@ SPL_VERSION = @SPL_VERSION@ STRIP = @STRIP@ TARGET_ASM_DIR = @TARGET_ASM_DIR@ VENDOR = @VENDOR@ VERSION = @VERSION@ ZFS_CONFIG = @ZFS_CONFIG@ ZFS_META_ALIAS = @ZFS_META_ALIAS@ ZFS_META_AUTHOR = @ZFS_META_AUTHOR@ ZFS_META_DATA = @ZFS_META_DATA@ ZFS_META_LICENSE = @ZFS_META_LICENSE@ ZFS_META_LT_AGE = @ZFS_META_LT_AGE@ ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@ ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@ ZFS_META_NAME = @ZFS_META_NAME@ ZFS_META_RELEASE = @ZFS_META_RELEASE@ ZFS_META_VERSION = @ZFS_META_VERSION@ ZLIB = @ZLIB@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ udevdir = @udevdir@ udevruledir = @udevruledir@ DEFAULT_INCLUDES = -include ${top_builddir}/zfs_config.h \ -I$(top_srcdir)/include -I$(top_srcdir)/lib/libspl/include AM_LIBTOOLFLAGS = --silent AM_CFLAGS = -Wall -Wstrict-prototypes -fno-strict-aliasing \ ${NO_UNUSED_BUT_SET_VARIABLE} ${DEBUG_CFLAGS} -D_GNU_SOURCE \ -D__EXTENSIONS__ -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS \ -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE \ -DTEXT_DOMAIN=\"zfs-linux-user\" lib_LTLIBRARIES = libzfs.la libzfs_la_SOURCES = \ $(top_srcdir)/lib/libzfs/libzfs_changelist.c \ $(top_srcdir)/lib/libzfs/libzfs_config.c \ $(top_srcdir)/lib/libzfs/libzfs_dataset.c \ $(top_srcdir)/lib/libzfs/libzfs_diff.c \ $(top_srcdir)/lib/libzfs/libzfs_fru.c \ $(top_srcdir)/lib/libzfs/libzfs_graph.c \ $(top_srcdir)/lib/libzfs/libzfs_import.c \ + $(top_srcdir)/lib/libzfs/libzfs_iter.c \ $(top_srcdir)/lib/libzfs/libzfs_mount.c \ $(top_srcdir)/lib/libzfs/libzfs_pool.c \ $(top_srcdir)/lib/libzfs/libzfs_sendrecv.c \ $(top_srcdir)/lib/libzfs/libzfs_status.c \ $(top_srcdir)/lib/libzfs/libzfs_util.c libzfs_la_LIBADD = \ $(top_builddir)/lib/libshare/libshare.la \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libzpool/libzpool.la libzfs_la_LDFLAGS = -lm -ldl -version-info 1:1:0 $(LIBSELINUX) all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/Rules.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu lib/libzfs/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu lib/libzfs/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libzfs.la: $(libzfs_la_OBJECTS) $(libzfs_la_DEPENDENCIES) $(AM_V_CCLD)$(libzfs_la_LINK) -rpath $(libdir) $(libzfs_la_OBJECTS) $(libzfs_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_changelist.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_config.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_dataset.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_diff.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_fru.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_graph.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_import.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_iter.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_mount.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_pool.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_sendrecv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_status.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzfs_util.Plo@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< libzfs_changelist.lo: $(top_srcdir)/lib/libzfs/libzfs_changelist.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_changelist.lo -MD -MP -MF $(DEPDIR)/libzfs_changelist.Tpo -c -o libzfs_changelist.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_changelist.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_changelist.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_changelist.Tpo $(DEPDIR)/libzfs_changelist.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_changelist.c' object='libzfs_changelist.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_changelist.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_changelist.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_changelist.c libzfs_config.lo: $(top_srcdir)/lib/libzfs/libzfs_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_config.lo -MD -MP -MF $(DEPDIR)/libzfs_config.Tpo -c -o libzfs_config.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_config.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_config.Tpo $(DEPDIR)/libzfs_config.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_config.c' object='libzfs_config.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_config.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_config.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_config.c libzfs_dataset.lo: $(top_srcdir)/lib/libzfs/libzfs_dataset.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_dataset.lo -MD -MP -MF $(DEPDIR)/libzfs_dataset.Tpo -c -o libzfs_dataset.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_dataset.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_dataset.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_dataset.Tpo $(DEPDIR)/libzfs_dataset.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_dataset.c' object='libzfs_dataset.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_dataset.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_dataset.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_dataset.c libzfs_diff.lo: $(top_srcdir)/lib/libzfs/libzfs_diff.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_diff.lo -MD -MP -MF $(DEPDIR)/libzfs_diff.Tpo -c -o libzfs_diff.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_diff.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_diff.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_diff.Tpo $(DEPDIR)/libzfs_diff.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_diff.c' object='libzfs_diff.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_diff.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_diff.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_diff.c libzfs_fru.lo: $(top_srcdir)/lib/libzfs/libzfs_fru.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_fru.lo -MD -MP -MF $(DEPDIR)/libzfs_fru.Tpo -c -o libzfs_fru.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_fru.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_fru.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_fru.Tpo $(DEPDIR)/libzfs_fru.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_fru.c' object='libzfs_fru.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_fru.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_fru.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_fru.c libzfs_graph.lo: $(top_srcdir)/lib/libzfs/libzfs_graph.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_graph.lo -MD -MP -MF $(DEPDIR)/libzfs_graph.Tpo -c -o libzfs_graph.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_graph.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_graph.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_graph.Tpo $(DEPDIR)/libzfs_graph.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_graph.c' object='libzfs_graph.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_graph.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_graph.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_graph.c libzfs_import.lo: $(top_srcdir)/lib/libzfs/libzfs_import.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_import.lo -MD -MP -MF $(DEPDIR)/libzfs_import.Tpo -c -o libzfs_import.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_import.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_import.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_import.Tpo $(DEPDIR)/libzfs_import.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_import.c' object='libzfs_import.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_import.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_import.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_import.c +libzfs_iter.lo: $(top_srcdir)/lib/libzfs/libzfs_iter.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_iter.lo -MD -MP -MF $(DEPDIR)/libzfs_iter.Tpo -c -o libzfs_iter.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_iter.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_iter.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_iter.Tpo $(DEPDIR)/libzfs_iter.Plo +@am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_iter.c' object='libzfs_iter.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_iter.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_iter.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_iter.c + libzfs_mount.lo: $(top_srcdir)/lib/libzfs/libzfs_mount.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_mount.lo -MD -MP -MF $(DEPDIR)/libzfs_mount.Tpo -c -o libzfs_mount.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_mount.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_mount.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_mount.Tpo $(DEPDIR)/libzfs_mount.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_mount.c' object='libzfs_mount.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_mount.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_mount.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_mount.c libzfs_pool.lo: $(top_srcdir)/lib/libzfs/libzfs_pool.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_pool.lo -MD -MP -MF $(DEPDIR)/libzfs_pool.Tpo -c -o libzfs_pool.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_pool.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_pool.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_pool.Tpo $(DEPDIR)/libzfs_pool.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_pool.c' object='libzfs_pool.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_pool.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_pool.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_pool.c libzfs_sendrecv.lo: $(top_srcdir)/lib/libzfs/libzfs_sendrecv.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_sendrecv.lo -MD -MP -MF $(DEPDIR)/libzfs_sendrecv.Tpo -c -o libzfs_sendrecv.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_sendrecv.Tpo $(DEPDIR)/libzfs_sendrecv.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c' object='libzfs_sendrecv.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_sendrecv.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c libzfs_status.lo: $(top_srcdir)/lib/libzfs/libzfs_status.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_status.lo -MD -MP -MF $(DEPDIR)/libzfs_status.Tpo -c -o libzfs_status.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_status.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_status.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_status.Tpo $(DEPDIR)/libzfs_status.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_status.c' object='libzfs_status.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_status.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_status.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_status.c libzfs_util.lo: $(top_srcdir)/lib/libzfs/libzfs_util.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libzfs_util.lo -MD -MP -MF $(DEPDIR)/libzfs_util.Tpo -c -o libzfs_util.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_util.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_util.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzfs_util.Tpo $(DEPDIR)/libzfs_util.Plo @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/lib/libzfs/libzfs_util.c' object='libzfs_util.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libzfs_util.lo `test -f '$(top_srcdir)/lib/libzfs/libzfs_util.c' || echo '$(srcdir)/'`$(top_srcdir)/lib/libzfs/libzfs_util.c mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) installdirs: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool ctags distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am \ install-libLTLIBRARIES install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-libLTLIBRARIES # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index c51b99d53cd7..476598cb2652 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1,4577 +1,4668 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_IDMAP #include #include #include #endif /* HAVE_IDMAP */ #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "libzfs_impl.h" #include "zfs_deleg.h" static int zvol_create_link_common(libzfs_handle_t *, const char *, int); static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); /* * Given a single type (not a mask of types), return the type in a human * readable form. */ const char * zfs_type_to_name(zfs_type_t type) { switch (type) { case ZFS_TYPE_FILESYSTEM: return (dgettext(TEXT_DOMAIN, "filesystem")); case ZFS_TYPE_SNAPSHOT: return (dgettext(TEXT_DOMAIN, "snapshot")); case ZFS_TYPE_VOLUME: return (dgettext(TEXT_DOMAIN, "volume")); default: break; } return (NULL); } /* * Validate a ZFS path. This is used even before trying to open the dataset, to * provide a more meaningful error message. We call zfs_error_aux() to * explain exactly why the name was not valid. */ int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, boolean_t modifying) { namecheck_err_t why; char what; (void) zfs_prop_get_table(); if (dataset_namecheck(path, &why, &what) != 0) { if (hdl != NULL) { switch (why) { case NAME_ERR_TOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is too long")); break; case NAME_ERR_LEADING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "leading slash in name")); break; case NAME_ERR_EMPTY_COMPONENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "empty component in name")); break; case NAME_ERR_TRAILING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "trailing slash in name")); break; case NAME_ERR_INVALCHAR: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character " "'%c' in name"), what); break; case NAME_ERR_MULTIPLE_AT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "multiple '@' delimiters in name")); break; case NAME_ERR_NOLETTER: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool doesn't begin with a letter")); break; case NAME_ERR_RESERVED: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is reserved")); break; case NAME_ERR_DISKLIKE: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "reserved disk name")); break; default: break; } } return (0); } if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshot delimiter '@' in filesystem name")); return (0); } if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing '@' delimiter in snapshot name")); return (0); } if (modifying && strchr(path, '%') != NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character %c in name"), '%'); return (0); } return (-1); } int zfs_name_valid(const char *name, zfs_type_t type) { if (type == ZFS_TYPE_POOL) return (zpool_name_valid(NULL, B_FALSE, name)); return (zfs_validate_name(NULL, name, type, B_FALSE)); } /* * This function takes the raw DSL properties, and filters out the user-defined * properties into a separate nvlist. */ static nvlist_t * process_user_props(zfs_handle_t *zhp, nvlist_t *props) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvpair_t *elem; nvlist_t *propval; nvlist_t *nvl; if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (NULL); } elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { if (!zfs_prop_user(nvpair_name(elem))) continue; verify(nvpair_value_nvlist(elem, &propval) == 0); if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) { nvlist_free(nvl); (void) no_memory(hdl); return (NULL); } } return (nvl); } static zpool_handle_t * zpool_add_handle(zfs_handle_t *zhp, const char *pool_name) { libzfs_handle_t *hdl = zhp->zfs_hdl; zpool_handle_t *zph; if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) { if (hdl->libzfs_pool_handles != NULL) zph->zpool_next = hdl->libzfs_pool_handles; hdl->libzfs_pool_handles = zph; } return (zph); } static zpool_handle_t * zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len) { libzfs_handle_t *hdl = zhp->zfs_hdl; zpool_handle_t *zph = hdl->libzfs_pool_handles; while ((zph != NULL) && (strncmp(pool_name, zpool_get_name(zph), len) != 0)) zph = zph->zpool_next; return (zph); } /* * Returns a handle to the pool that contains the provided dataset. * If a handle to that pool already exists then that handle is returned. * Otherwise, a new handle is created and added to the list of handles. */ static zpool_handle_t * zpool_handle(zfs_handle_t *zhp) { char *pool_name; int len; zpool_handle_t *zph; len = strcspn(zhp->zfs_name, "/@") + 1; pool_name = zfs_alloc(zhp->zfs_hdl, len); (void) strlcpy(pool_name, zhp->zfs_name, len); zph = zpool_find_handle(zhp, pool_name, len); if (zph == NULL) zph = zpool_add_handle(zhp, pool_name); free(pool_name); return (zph); } void zpool_free_handles(libzfs_handle_t *hdl) { zpool_handle_t *next, *zph = hdl->libzfs_pool_handles; while (zph != NULL) { next = zph->zpool_next; zpool_close(zph); zph = next; } hdl->libzfs_pool_handles = NULL; } /* * Utility function to gather stats (objset and zpl) for the given object. */ static int get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc) { libzfs_handle_t *hdl = zhp->zfs_hdl; (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) { if (errno == ENOMEM) { if (zcmd_expand_dst_nvlist(hdl, zc) != 0) { return (-1); } } else { return (-1); } } return (0); } /* * Utility function to get the received properties of the given object. */ static int get_recvd_props_ioctl(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *recvdprops; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int err; if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) return (-1); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) { if (errno == ENOMEM) { if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { return (-1); } } else { zcmd_free_nvlists(&zc); return (-1); } } err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops); zcmd_free_nvlists(&zc); if (err != 0) return (-1); nvlist_free(zhp->zfs_recvd_props); zhp->zfs_recvd_props = recvdprops; return (0); } static int put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc) { nvlist_t *allprops, *userprops; zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */ if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) { return (-1); } /* * XXX Why do we store the user props separately, in addition to * storing them in zfs_props? */ if ((userprops = process_user_props(zhp, allprops)) == NULL) { nvlist_free(allprops); return (-1); } nvlist_free(zhp->zfs_props); nvlist_free(zhp->zfs_user_props); zhp->zfs_props = allprops; zhp->zfs_user_props = userprops; return (0); } static int get_stats(zfs_handle_t *zhp) { int rc = 0; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) return (-1); if (get_stats_ioctl(zhp, &zc) != 0) rc = -1; else if (put_stats_zhdl(zhp, &zc) != 0) rc = -1; zcmd_free_nvlists(&zc); return (rc); } /* * Refresh the properties currently stored in the handle. */ void zfs_refresh_properties(zfs_handle_t *zhp) { (void) get_stats(zhp); } /* * Makes a handle from the given dataset name. Used by zfs_open() and * zfs_iter_* to create child handles on the fly. */ static int make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) { if (put_stats_zhdl(zhp, zc) != 0) return (-1); /* * We've managed to open the dataset and gather statistics. Determine * the high-level type. */ if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) zhp->zfs_head_type = ZFS_TYPE_VOLUME; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) return (-1); /* zpios' and other testing datasets are of this type, ignore if encountered */ else abort(); if (zhp->zfs_dmustats.dds_is_snapshot) zhp->zfs_type = ZFS_TYPE_SNAPSHOT; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) zhp->zfs_type = ZFS_TYPE_VOLUME; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_type = ZFS_TYPE_FILESYSTEM; else abort(); /* we should never see any other types */ if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) return (-1); return (0); } zfs_handle_t * make_dataset_handle(libzfs_handle_t *hdl, const char *path) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); if (zhp == NULL) return (NULL); zhp->zfs_hdl = hdl; (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) { free(zhp); return (NULL); } if (get_stats_ioctl(zhp, &zc) == -1) { zcmd_free_nvlists(&zc); free(zhp); return (NULL); } if (make_dataset_handle_common(zhp, &zc) == -1) { free(zhp); zhp = NULL; } zcmd_free_nvlists(&zc); return (zhp); } -static zfs_handle_t * +zfs_handle_t * make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc) { zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); if (zhp == NULL) return (NULL); zhp->zfs_hdl = hdl; (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); if (make_dataset_handle_common(zhp, zc) == -1) { free(zhp); return (NULL); } return (zhp); } -static zfs_handle_t * +zfs_handle_t * make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc) { zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); if (zhp == NULL) return (NULL); zhp->zfs_hdl = pzhp->zfs_hdl; (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); zhp->zfs_head_type = pzhp->zfs_type; zhp->zfs_type = ZFS_TYPE_SNAPSHOT; zhp->zpool_hdl = zpool_handle(zhp); return (zhp); } +zfs_handle_t * +zfs_handle_dup(zfs_handle_t *zhp_orig) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = zhp_orig->zfs_hdl; + zhp->zpool_hdl = zhp_orig->zpool_hdl; + (void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name, + sizeof (zhp->zfs_name)); + zhp->zfs_type = zhp_orig->zfs_type; + zhp->zfs_head_type = zhp_orig->zfs_head_type; + zhp->zfs_dmustats = zhp_orig->zfs_dmustats; + if (zhp_orig->zfs_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_user_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_user_props, + &zhp->zfs_user_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_recvd_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_recvd_props, + &zhp->zfs_recvd_props, 0)) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck; + if (zhp_orig->zfs_mntopts != NULL) { + zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl, + zhp_orig->zfs_mntopts); + } + zhp->zfs_props_table = zhp_orig->zfs_props_table; + return (zhp); +} + /* * Opens the given snapshot, filesystem, or volume. The 'types' * argument is a mask of acceptable types. The function will print an * appropriate error message and return NULL if it can't be opened. */ zfs_handle_t * zfs_open(libzfs_handle_t *hdl, const char *path, int types) { zfs_handle_t *zhp; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); /* * Validate the name before we even try to open it. */ if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid dataset name")); (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); return (NULL); } /* * Try to get stats for the dataset, which will tell us if it exists. */ errno = 0; if ((zhp = make_dataset_handle(hdl, path)) == NULL) { (void) zfs_standard_error(hdl, errno, errbuf); return (NULL); } if (!(types & zhp->zfs_type)) { (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); return (NULL); } return (zhp); } /* * Release a ZFS handle. Nothing to do but free the associated memory. */ void zfs_close(zfs_handle_t *zhp) { if (zhp->zfs_mntopts) free(zhp->zfs_mntopts); nvlist_free(zhp->zfs_props); nvlist_free(zhp->zfs_user_props); nvlist_free(zhp->zfs_recvd_props); free(zhp); } typedef struct mnttab_node { struct mnttab mtn_mt; avl_node_t mtn_node; } mnttab_node_t; static int libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) { const mnttab_node_t *mtn1 = arg1; const mnttab_node_t *mtn2 = arg2; int rv; rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special); if (rv == 0) return (0); return (rv > 0 ? 1 : -1); } void libzfs_mnttab_init(libzfs_handle_t *hdl) { assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); } void libzfs_mnttab_update(libzfs_handle_t *hdl) { struct mnttab entry; rewind(hdl->libzfs_mnttab); while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { mnttab_node_t *mtn; if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) continue; mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special); mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp); mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype); mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); avl_add(&hdl->libzfs_mnttab_cache, mtn); } } void libzfs_mnttab_fini(libzfs_handle_t *hdl) { void *cookie = NULL; mnttab_node_t *mtn; while ((mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie))) { free(mtn->mtn_mt.mnt_special); free(mtn->mtn_mt.mnt_mountp); free(mtn->mtn_mt.mnt_fstype); free(mtn->mtn_mt.mnt_mntopts); free(mtn); } avl_destroy(&hdl->libzfs_mnttab_cache); } void libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) { hdl->libzfs_mnttab_enable = enable; } int libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, struct mnttab *entry) { mnttab_node_t find; mnttab_node_t *mtn; if (!hdl->libzfs_mnttab_enable) { struct mnttab srch = { 0 }; if (avl_numnodes(&hdl->libzfs_mnttab_cache)) libzfs_mnttab_fini(hdl); rewind(hdl->libzfs_mnttab); srch.mnt_special = (char *)fsname; srch.mnt_fstype = MNTTYPE_ZFS; if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0) return (0); else return (ENOENT); } if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) libzfs_mnttab_update(hdl); find.mtn_mt.mnt_special = (char *)fsname; mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); if (mtn) { *entry = mtn->mtn_mt; return (0); } return (ENOENT); } void libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, const char *mountp, const char *mntopts) { mnttab_node_t *mtn; if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) return; mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); avl_add(&hdl->libzfs_mnttab_cache, mtn); } void libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) { mnttab_node_t find; mnttab_node_t *ret; find.mtn_mt.mnt_special = (char *)fsname; if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))) { avl_remove(&hdl->libzfs_mnttab_cache, ret); free(ret->mtn_mt.mnt_special); free(ret->mtn_mt.mnt_mountp); free(ret->mtn_mt.mnt_fstype); free(ret->mtn_mt.mnt_mntopts); free(ret); } } int zfs_spa_version(zfs_handle_t *zhp, int *spa_version) { zpool_handle_t *zpool_handle = zhp->zpool_hdl; if (zpool_handle == NULL) return (-1); *spa_version = zpool_get_prop_int(zpool_handle, ZPOOL_PROP_VERSION, NULL); return (0); } /* * The choice of reservation property depends on the SPA version. */ static int zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) { int spa_version; if (zfs_spa_version(zhp, &spa_version) < 0) return (-1); if (spa_version >= SPA_VERSION_REFRESERVATION) *resv_prop = ZFS_PROP_REFRESERVATION; else *resv_prop = ZFS_PROP_RESERVATION; return (0); } /* * Given an nvlist of properties to set, validates that they are correct, and * parses any numeric properties (index, boolean, etc) if they are specified as * strings. */ nvlist_t * zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, const char *errbuf) { nvpair_t *elem; uint64_t intval; char *strval; zfs_prop_t prop; nvlist_t *ret; int chosen_normal = -1; int chosen_utf = -1; if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (NULL); } /* * Make sure this property is valid and applies to this type. */ elem = NULL; while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { const char *propname = nvpair_name(elem); prop = zfs_name_to_prop(propname); if (prop == ZPROP_INVAL && zfs_prop_user(propname)) { /* * This is a user property: make sure it's a * string, and that it's less than ZAP_MAXNAMELEN. */ if (nvpair_type(elem) != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property name '%s' is too long"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } (void) nvpair_value_string(elem, &strval); if (nvlist_add_string(ret, propname, strval) != 0) { (void) no_memory(hdl); goto error; } continue; } /* * Currently, only user properties can be modified on * snapshots. */ if (type == ZFS_TYPE_SNAPSHOT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "this property can not be modified for snapshots")); (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); goto error; } if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) { zfs_userquota_prop_t uqtype; char newpropname[128]; char domain[128]; uint64_t rid; uint64_t valary[3]; if (userquota_propname_decode(propname, zoned, &uqtype, domain, sizeof (domain), &rid) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' has an invalid user/group name"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (uqtype != ZFS_PROP_USERQUOTA && uqtype != ZFS_PROP_GROUPQUOTA) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (nvpair_type(elem) == DATA_TYPE_STRING) { (void) nvpair_value_string(elem, &strval); if (strcmp(strval, "none") == 0) { intval = 0; } else if (zfs_nicestrtonum(hdl, strval, &intval) != 0) { (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { (void) nvpair_value_uint64(elem, &intval); if (intval == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "use 'none' to disable " "userquota/groupquota")); goto error; } } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a number"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } /* * Encode the prop name as * userquota@-domain, to make it easy * for the kernel to decode. */ (void) snprintf(newpropname, sizeof (newpropname), "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype], (longlong_t)rid, domain); valary[0] = uqtype; valary[1] = rid; valary[2] = intval; if (nvlist_add_uint64_array(ret, newpropname, valary, 3) != 0) { (void) no_memory(hdl); goto error; } continue; + } else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is readonly"), + propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); + goto error; } if (prop == ZPROP_INVAL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (!zfs_prop_valid_for_type(prop, type)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' does not " "apply to datasets of this type"), propname); (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); goto error; } if (zfs_prop_readonly(prop) && (!zfs_prop_setonce(prop) || zhp != NULL)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (zprop_parse_value(hdl, elem, prop, type, ret, &strval, &intval, errbuf) != 0) goto error; /* * Perform some additional checks for specific properties. */ switch (prop) { case ZFS_PROP_VERSION: { int version; if (zhp == NULL) break; version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); if (intval < version) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Can not downgrade; already at version %u"), version); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; } case ZFS_PROP_RECORDSIZE: case ZFS_PROP_VOLBLOCKSIZE: /* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */ if (intval < SPA_MINBLOCKSIZE || intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be power of 2 from %u " "to %uk"), propname, (uint_t)SPA_MINBLOCKSIZE, (uint_t)SPA_MAXBLOCKSIZE >> 10); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZFS_PROP_MLSLABEL: { #ifdef HAVE_MLSLABEL /* * Verify the mlslabel string and convert to * internal hex label string. */ m_label_t *new_sl; char *hex = NULL; /* internal label string */ /* Default value is already OK. */ if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) break; /* Verify the label can be converted to binary form */ if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) || (str_to_label(strval, &new_sl, MAC_LABEL, L_NO_CORRECTION, NULL) == -1)) { goto badlabel; } /* Now translate to hex internal label string */ if (label_to_str(new_sl, &hex, M_INTERNAL, DEF_NAMES) != 0) { if (hex) free(hex); goto badlabel; } m_label_free(new_sl); /* If string is already in internal form, we're done. */ if (strcmp(strval, hex) == 0) { free(hex); break; } /* Replace the label string with the internal form. */ (void) nvlist_remove(ret, zfs_prop_to_name(prop), DATA_TYPE_STRING); verify(nvlist_add_string(ret, zfs_prop_to_name(prop), hex) == 0); free(hex); break; badlabel: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid mlslabel '%s'"), strval); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); m_label_free(new_sl); /* OK if null */ goto error; #else zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "mlslabels are unsupported")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; #endif /* HAVE_MLSLABEL */ } case ZFS_PROP_MOUNTPOINT: { namecheck_err_t why; if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 || strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0) break; if (mountpoint_namecheck(strval, &why)) { switch (why) { case NAME_ERR_LEADING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be an absolute path, " "'none', or 'legacy'"), propname); break; case NAME_ERR_TOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "component of '%s' is too long"), propname); break; default: break; } (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } /*FALLTHRU*/ case ZFS_PROP_SHARESMB: case ZFS_PROP_SHARENFS: /* * For the mountpoint and sharenfs or sharesmb * properties, check if it can be set in a * global/non-global zone based on * the zoned property value: * * global zone non-global zone * -------------------------------------------------- * zoned=on mountpoint (no) mountpoint (yes) * sharenfs (no) sharenfs (no) * sharesmb (no) sharesmb (no) * * zoned=off mountpoint (yes) N/A * sharenfs (yes) * sharesmb (yes) */ if (zoned) { if (getzoneid() == GLOBAL_ZONEID) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set on " "dataset in a non-global zone"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } else if (prop == ZFS_PROP_SHARENFS || prop == ZFS_PROP_SHARESMB) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set in " "a non-global zone"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } } else if (getzoneid() != GLOBAL_ZONEID) { /* * If zoned property is 'off', this must be in * a global zone. If not, something is wrong. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set while dataset " "'zoned' property is set"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } /* * At this point, it is legitimate to set the * property. Now we want to make sure that the * property value is valid if it is sharenfs. */ if ((prop == ZFS_PROP_SHARENFS || prop == ZFS_PROP_SHARESMB) && strcmp(strval, "on") != 0 && strcmp(strval, "off") != 0) { zfs_share_proto_t proto; if (prop == ZFS_PROP_SHARESMB) proto = PROTO_SMB; else proto = PROTO_NFS; /* * Must be an valid sharing protocol * option string so init the libshare * in order to enable the parser and * then parse the options. We use the * control API since we don't care about * the current configuration and don't * want the overhead of loading it * until we actually do something. */ if (zfs_init_libshare(hdl, SA_INIT_CONTROL_API) != SA_OK) { /* * An error occurred so we can't do * anything */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set: problem " "in share initialization"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (zfs_parse_options(strval, proto) != SA_OK) { /* * There was an error in parsing so * deal with it by issuing an error * message and leaving after * uninitializing the the libshare * interface. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set to invalid " "options"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); zfs_uninit_libshare(hdl); goto error; } zfs_uninit_libshare(hdl); } break; case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; case ZFS_PROP_NORMALIZE: chosen_normal = (int)intval; break; default: break; } /* * For changes to existing volumes, we have some additional * checks to enforce. */ if (type == ZFS_TYPE_VOLUME && zhp != NULL) { uint64_t volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); uint64_t blocksize = zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE); char buf[64]; switch (prop) { case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: if (intval > volsize) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is greater than current " "volume size"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZFS_PROP_VOLSIZE: if (intval % blocksize != 0) { zfs_nicenum(blocksize, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a multiple of " "volume block size (%s)"), propname, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (intval == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be zero"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; default: break; } } } /* * If normalization was chosen, but no UTF8 choice was made, * enforce rejection of non-UTF8 names. * * If normalization was chosen, but rejecting non-UTF8 names * was explicitly not chosen, it is an error. */ if (chosen_normal > 0 && chosen_utf < 0) { if (nvlist_add_uint64(ret, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) { (void) no_memory(hdl); goto error; } } else if (chosen_normal > 0 && chosen_utf == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be set 'on' if normalization chosen"), zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } return (ret); error: nvlist_free(ret); return (NULL); } int zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) { uint64_t old_volsize; uint64_t new_volsize; uint64_t old_reservation; uint64_t new_reservation; zfs_prop_t resv_prop; /* * If this is an existing volume, and someone is setting the volsize, * make sure that it matches the reservation, or add it if necessary. */ old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_reservation = zfs_prop_get_int(zhp, resv_prop); if ((zvol_volsize_to_reservation(old_volsize, zhp->zfs_props) != old_reservation) || nvlist_lookup_uint64(nvl, zfs_prop_to_name(resv_prop), &new_reservation) != ENOENT) { return (0); } if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &new_volsize) != 0) return (-1); new_reservation = zvol_volsize_to_reservation(new_volsize, zhp->zfs_props); if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), new_reservation) != 0) { (void) no_memory(zhp->zfs_hdl); return (-1); } return (1); } void zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, char *errbuf) { switch (err) { case ENOSPC: /* * For quotas and reservations, ENOSPC indicates * something different; setting a quota or reservation * doesn't use any disk space. */ switch (prop) { case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "size is less than current used or " "reserved space")); (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "size is greater than available space")); (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; default: (void) zfs_standard_error(hdl, err, errbuf); break; } break; case EBUSY: (void) zfs_standard_error(hdl, EBUSY, errbuf); break; case EROFS: (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); break; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool and or dataset must be upgraded to set this " "property or value")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case ERANGE: if (prop == ZFS_PROP_COMPRESSION) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "bootable datasets")); (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } break; case EINVAL: if (prop == ZPROP_INVAL) { (void) zfs_error(hdl, EZFS_BADPROP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } break; case EOVERFLOW: /* * This platform can't address a volume this big. */ #ifdef _ILP32 if (prop == ZFS_PROP_VOLSIZE) { (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); break; } #endif /* FALLTHROUGH */ default: (void) zfs_standard_error(hdl, err, errbuf); } } static boolean_t zfs_is_namespace_prop(zfs_prop_t prop) { switch (prop) { case ZFS_PROP_ATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_SETUID: case ZFS_PROP_READONLY: case ZFS_PROP_XATTR: case ZFS_PROP_NBMAND: return (B_TRUE); default: return (B_FALSE); } } /* * Given a property name and value, set the property for the given dataset. */ int zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret = -1; prop_changelist_t *cl = NULL; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *nvl = NULL, *realprops; zfs_prop_t prop; boolean_t do_prefix; uint64_t idx; int added_resv = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), zhp->zfs_name); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 || nvlist_add_string(nvl, propname, propval) != 0) { (void) no_memory(hdl); goto error; } if ((realprops = zfs_valid_proplist(hdl, zhp->zfs_type, nvl, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL) goto error; nvlist_free(nvl); nvl = realprops; prop = zfs_name_to_prop(propname); if (prop == ZFS_PROP_VOLSIZE) { if ((added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1) goto error; } if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) goto error; if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } /* * If the dataset's canmount property is being set to noauto, * then we want to prevent unmounting & remounting it. */ do_prefix = !((prop == ZFS_PROP_CANMOUNT) && (zprop_string_to_index(prop, propval, &idx, ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO)); if (do_prefix && (ret = changelist_prefix(cl)) != 0) goto error; /* * Execute the corresponding ioctl() to set this property. */ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0) goto error; ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); if (ret != 0) { zfs_setprop_error(hdl, prop, errno, errbuf); if (added_resv && errno == ENOSPC) { /* clean up the volsize property we tried to set */ uint64_t old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); nvlist_free(nvl); zcmd_free_nvlists(&zc); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) goto error; if (nvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), old_volsize) != 0) goto error; if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0) goto error; (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); } } else { if (do_prefix) ret = changelist_postfix(cl); if (ret == 0) { /* * Refresh the statistics so the new property * value is reflected. */ (void) get_stats(zhp); /* * Remount the filesystem to propagate the change * if one of the options handled by the generic * Linux namespace layer has been modified. */ if (zfs_is_namespace_prop(prop) && zfs_is_mounted(zhp, NULL)) ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0); } } error: nvlist_free(nvl); zcmd_free_nvlists(&zc); if (cl) changelist_free(cl); return (ret); } /* * Given a property, inherit the value from the parent dataset, or if received * is TRUE, revert to the received value, if any. */ int zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; prop_changelist_t *cl; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; zfs_prop_t prop; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s'"), propname, zhp->zfs_name); zc.zc_cookie = received; if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) { /* * For user properties, the amount of work we have to do is very * small, so just do it here. */ if (!zfs_prop_user(propname)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0) return (zfs_standard_error(hdl, errno, errbuf)); return (0); } /* * Verify that this property is inheritable. */ if (zfs_prop_readonly(prop)) return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); if (!zfs_prop_inheritable(prop) && !received) return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); /* * Check to see if the value applies to this type */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); /* * Normalize the name, to get rid of shorthand abbreviations. */ propname = zfs_prop_to_name(prop); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is used in a non-global zone")); return (zfs_error(hdl, EZFS_ZONED, errbuf)); } /* * Determine datasets which will be affected by this change, if any. */ if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) return (-1); if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } if ((ret = changelist_prefix(cl)) != 0) goto error; if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) { return (zfs_standard_error(hdl, errno, errbuf)); } else { if ((ret = changelist_postfix(cl)) != 0) goto error; /* * Refresh the statistics so the new property is reflected. */ (void) get_stats(zhp); } error: changelist_free(cl); return (ret); } /* * True DSL properties are stored in an nvlist. The following two functions * extract them appropriately. */ uint64_t getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; uint64_t value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); value = zfs_prop_default_numeric(prop); *source = ""; } return (value); } static char * getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; char *value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); if ((value = (char *)zfs_prop_default_string(prop)) == NULL) value = ""; *source = ""; } return (value); } static boolean_t zfs_is_recvd_props_mode(zfs_handle_t *zhp) { return (zhp->zfs_props == zhp->zfs_recvd_props); } static void zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) { *cookie = (uint64_t)(uintptr_t)zhp->zfs_props; zhp->zfs_props = zhp->zfs_recvd_props; } static void zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) { zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie; *cookie = 0; } /* * Internal function for getting a numeric property. Both zfs_prop_get() and * zfs_prop_get_int() are built using this interface. * * Certain properties can be overridden using 'mount -o'. In this case, scan * the contents of the /etc/mtab entry, searching for the appropriate options. * If they differ from the on-disk values, report the current values and mark * the source "temporary". */ static int get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, char **source, uint64_t *val) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; nvlist_t *zplprops = NULL; struct mnttab mnt; char *mntopt_on = NULL; char *mntopt_off = NULL; boolean_t received = zfs_is_recvd_props_mode(zhp); *source = NULL; switch (prop) { case ZFS_PROP_ATIME: mntopt_on = MNTOPT_ATIME; mntopt_off = MNTOPT_NOATIME; break; case ZFS_PROP_DEVICES: mntopt_on = MNTOPT_DEVICES; mntopt_off = MNTOPT_NODEVICES; break; case ZFS_PROP_EXEC: mntopt_on = MNTOPT_EXEC; mntopt_off = MNTOPT_NOEXEC; break; case ZFS_PROP_READONLY: mntopt_on = MNTOPT_RO; mntopt_off = MNTOPT_RW; break; case ZFS_PROP_SETUID: mntopt_on = MNTOPT_SETUID; mntopt_off = MNTOPT_NOSETUID; break; case ZFS_PROP_XATTR: mntopt_on = MNTOPT_XATTR; mntopt_off = MNTOPT_NOXATTR; break; case ZFS_PROP_NBMAND: mntopt_on = MNTOPT_NBMAND; mntopt_off = MNTOPT_NONBMAND; break; default: break; } /* * Because looking up the mount options is potentially expensive * (iterating over all of /etc/mtab), we defer its calculation until * we're looking up a property which requires its presence. */ if (!zhp->zfs_mntcheck && (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) { libzfs_handle_t *hdl = zhp->zfs_hdl; struct mnttab entry; if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) { zhp->zfs_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); if (zhp->zfs_mntopts == NULL) return (-1); } zhp->zfs_mntcheck = B_TRUE; } if (zhp->zfs_mntopts == NULL) mnt.mnt_mntopts = ""; else mnt.mnt_mntopts = zhp->zfs_mntopts; switch (prop) { case ZFS_PROP_ATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_READONLY: case ZFS_PROP_SETUID: case ZFS_PROP_XATTR: case ZFS_PROP_NBMAND: *val = getprop_uint64(zhp, prop, source); if (received) break; if (hasmntopt(&mnt, mntopt_on) && !*val) { *val = B_TRUE; if (src) *src = ZPROP_SRC_TEMPORARY; } else if (hasmntopt(&mnt, mntopt_off) && *val) { *val = B_FALSE; if (src) *src = ZPROP_SRC_TEMPORARY; } break; case ZFS_PROP_CANMOUNT: case ZFS_PROP_VOLSIZE: case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: *val = getprop_uint64(zhp, prop, source); if (*source == NULL) { /* not default, must be local */ *source = zhp->zfs_name; } break; case ZFS_PROP_MOUNTED: *val = (zhp->zfs_mntopts != NULL); break; case ZFS_PROP_NUMCLONES: *val = zhp->zfs_dmustats.dds_num_clones; break; case ZFS_PROP_VERSION: case ZFS_PROP_NORMALIZE: case ZFS_PROP_UTF8ONLY: case ZFS_PROP_CASE: if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) || zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) return (-1); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) { zcmd_free_nvlists(&zc); return (-1); } if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 || nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop), val) != 0) { zcmd_free_nvlists(&zc); return (-1); } if (zplprops) nvlist_free(zplprops); zcmd_free_nvlists(&zc); break; default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: case PROP_TYPE_INDEX: *val = getprop_uint64(zhp, prop, source); /* * If we tried to use a default value for a * readonly property, it means that it was not * present. */ if (zfs_prop_readonly(prop) && *source != NULL && (*source)[0] == '\0') { *source = NULL; } break; case PROP_TYPE_STRING: default: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "cannot get non-numeric property")); return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "internal error"))); } } return (0); } /* * Calculate the source type, given the raw source string. */ static void get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, char *statbuf, size_t statlen) { if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY) return; if (source == NULL) { *srctype = ZPROP_SRC_NONE; } else if (source[0] == '\0') { *srctype = ZPROP_SRC_DEFAULT; } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) { *srctype = ZPROP_SRC_RECEIVED; } else { if (strcmp(source, zhp->zfs_name) == 0) { *srctype = ZPROP_SRC_LOCAL; } else { (void) strlcpy(statbuf, source, statlen); *srctype = ZPROP_SRC_INHERITED; } } } int zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf, size_t proplen, boolean_t literal) { zfs_prop_t prop; int err = 0; if (zhp->zfs_recvd_props == NULL) if (get_recvd_props_ioctl(zhp) != 0) return (-1); prop = zfs_name_to_prop(propname); if (prop != ZPROP_INVAL) { uint64_t cookie; if (!nvlist_exists(zhp->zfs_recvd_props, propname)) return (-1); zfs_set_recvd_props_mode(zhp, &cookie); err = zfs_prop_get(zhp, prop, propbuf, proplen, NULL, NULL, 0, literal); zfs_unset_recvd_props_mode(zhp, &cookie); - } else if (zfs_prop_userquota(propname)) { - return (-1); } else { nvlist_t *propval; char *recvdval; if (nvlist_lookup_nvlist(zhp->zfs_recvd_props, propname, &propval) != 0) return (-1); verify(nvlist_lookup_string(propval, ZPROP_VALUE, &recvdval) == 0); (void) strlcpy(propbuf, recvdval, proplen); } return (err == 0 ? 0 : -1); } +static int +get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) +{ + nvlist_t *value; + nvpair_t *pair; + + value = zfs_get_clones_nvl(zhp); + if (value == NULL) + return (-1); + + propbuf[0] = '\0'; + for (pair = nvlist_next_nvpair(value, NULL); pair != NULL; + pair = nvlist_next_nvpair(value, pair)) { + if (propbuf[0] != '\0') + (void) strlcat(propbuf, ",", proplen); + (void) strlcat(propbuf, nvpair_name(pair), proplen); + } + + return (0); +} + +struct get_clones_arg { + uint64_t numclones; + nvlist_t *value; + const char *origin; + char buf[ZFS_MAXNAMELEN]; +}; + +int +get_clones_cb(zfs_handle_t *zhp, void *arg) +{ + struct get_clones_arg *gca = arg; + + if (gca->numclones == 0) { + zfs_close(zhp); + return (0); + } + + if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf), + NULL, NULL, 0, B_TRUE) != 0) + goto out; + if (strcmp(gca->buf, gca->origin) == 0) { + if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) { + zfs_close(zhp); + return (no_memory(zhp->zfs_hdl)); + } + gca->numclones--; + } + +out: + (void) zfs_iter_children(zhp, get_clones_cb, gca); + zfs_close(zhp); + return (0); +} + +nvlist_t * +zfs_get_clones_nvl(zfs_handle_t *zhp) +{ + nvlist_t *nv, *value; + + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) { + struct get_clones_arg gca; + + /* + * if this is a snapshot, then the kernel wasn't able + * to get the clones. Do it by slowly iterating. + */ + if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) + return (NULL); + if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0) + return (NULL); + if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) { + nvlist_free(nv); + return (NULL); + } + + gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES); + gca.value = value; + gca.origin = zhp->zfs_name; + + if (gca.numclones != 0) { + zfs_handle_t *root; + char pool[ZFS_MAXNAMELEN]; + char *cp = pool; + + /* get the pool name */ + (void) strlcpy(pool, zhp->zfs_name, sizeof (pool)); + (void) strsep(&cp, "/@"); + root = zfs_open(zhp->zfs_hdl, pool, + ZFS_TYPE_FILESYSTEM); + + (void) get_clones_cb(root, &gca); + } + + if (gca.numclones != 0 || + nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 || + nvlist_add_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) { + nvlist_free(nv); + nvlist_free(value); + return (NULL); + } + nvlist_free(nv); + nvlist_free(value); + verify(0 == nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv)); + } + + verify(nvlist_lookup_nvlist(nv, ZPROP_VALUE, &value) == 0); + + return (value); +} + /* * Retrieve a property from the given object. If 'literal' is specified, then * numbers are left as exact values. Otherwise, numbers are converted to a * human-readable form. * * Returns 0 on success, or -1 on error. */ int zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal) { char *source = NULL; uint64_t val; char *str; const char *strval; boolean_t received = zfs_is_recvd_props_mode(zhp); /* * Check to see if this property applies to our object */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) return (-1); if (received && zfs_prop_readonly(prop)) return (-1); if (src) *src = ZPROP_SRC_NONE; switch (prop) { case ZFS_PROP_CREATION: /* * 'creation' is a time_t stored in the statistics. We convert * this into a string unless 'literal' is specified. */ { val = getprop_uint64(zhp, prop, &source); time_t time = (time_t)val; struct tm t; if (literal || localtime_r(&time, &t) == NULL || strftime(propbuf, proplen, "%a %b %e %k:%M %Y", &t) == 0) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t) val); } break; case ZFS_PROP_MOUNTPOINT: /* * Getting the precise mountpoint can be tricky. * * - for 'none' or 'legacy', return those values. * - for inherited mountpoints, we want to take everything * after our ancestor and append it to the inherited value. * * If the pool has an alternate root, we want to prepend that * root to any values we return. */ str = getprop_string(zhp, prop, &source); if (str[0] == '/') { char buf[MAXPATHLEN]; char *root = buf; const char *relpath; /* * If we inherit the mountpoint, even from a dataset * with a received value, the source will be the path of * the dataset we inherit from. If source is * ZPROP_SOURCE_VAL_RECVD, the received value is not * inherited. */ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { relpath = ""; } else { relpath = zhp->zfs_name + strlen(source); if (relpath[0] == '/') relpath++; } if ((zpool_get_prop(zhp->zpool_hdl, ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) || (strcmp(root, "-") == 0)) root[0] = '\0'; /* * Special case an alternate root of '/'. This will * avoid having multiple leading slashes in the * mountpoint path. */ if (strcmp(root, "/") == 0) root++; /* * If the mountpoint is '/' then skip over this * if we are obtaining either an alternate root or * an inherited mountpoint. */ if (str[1] == '\0' && (root[0] != '\0' || relpath[0] != '\0')) str++; if (relpath[0] == '\0') (void) snprintf(propbuf, proplen, "%s%s", root, str); else (void) snprintf(propbuf, proplen, "%s%s%s%s", root, str, relpath[0] == '@' ? "" : "/", relpath); } else { /* 'legacy' or 'none' */ (void) strlcpy(propbuf, str, proplen); } break; case ZFS_PROP_ORIGIN: (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); /* * If there is no parent at all, return failure to indicate that * it doesn't apply to this dataset. */ if (propbuf[0] == '\0') return (-1); break; + case ZFS_PROP_CLONES: + if (get_clones_string(zhp, propbuf, proplen) != 0) + return (-1); + break; + case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); /* * If quota or reservation is 0, we translate this into 'none' * (unless literal is set), and indicate that it's the default * value. Otherwise, we print the number nicely and indicate * that its set locally. */ if (val == 0) { if (literal) (void) strlcpy(propbuf, "0", proplen); else (void) strlcpy(propbuf, "none", proplen); } else { if (literal) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); else zfs_nicenum(val, propbuf, proplen); } break; case ZFS_PROP_REFRATIO: case ZFS_PROP_COMPRESSRATIO: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); (void) snprintf(propbuf, proplen, "%llu.%02llux", (u_longlong_t)(val / 100), (u_longlong_t)(val % 100)); break; case ZFS_PROP_TYPE: switch (zhp->zfs_type) { case ZFS_TYPE_FILESYSTEM: str = "filesystem"; break; case ZFS_TYPE_VOLUME: str = "volume"; break; case ZFS_TYPE_SNAPSHOT: str = "snapshot"; break; default: abort(); } (void) snprintf(propbuf, proplen, "%s", str); break; case ZFS_PROP_MOUNTED: /* * The 'mounted' property is a pseudo-property that described * whether the filesystem is currently mounted. Even though * it's a boolean value, the typical values of "on" and "off" * don't make sense, so we translate to "yes" and "no". */ if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, src, &source, &val) != 0) return (-1); if (val) (void) strlcpy(propbuf, "yes", proplen); else (void) strlcpy(propbuf, "no", proplen); break; case ZFS_PROP_NAME: /* * The 'name' property is a pseudo-property derived from the * dataset name. It is presented as a real property to simplify * consumers. */ (void) strlcpy(propbuf, zhp->zfs_name, proplen); break; case ZFS_PROP_MLSLABEL: { #ifdef HAVE_MLSLABEL m_label_t *new_sl = NULL; char *ascii = NULL; /* human readable label */ (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); if (literal || (strcasecmp(propbuf, ZFS_MLSLABEL_DEFAULT) == 0)) break; /* * Try to translate the internal hex string to * human-readable output. If there are any * problems just use the hex string. */ if (str_to_label(propbuf, &new_sl, MAC_LABEL, L_NO_CORRECTION, NULL) == -1) { m_label_free(new_sl); break; } if (label_to_str(new_sl, &ascii, M_LABEL, DEF_NAMES) != 0) { if (ascii) free(ascii); m_label_free(new_sl); break; } m_label_free(new_sl); (void) strlcpy(propbuf, ascii, proplen); free(ascii); #else (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); #endif /* HAVE_MLSLABEL */ } break; default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); if (literal) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); else zfs_nicenum(val, propbuf, proplen); break; case PROP_TYPE_STRING: (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); break; case PROP_TYPE_INDEX: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); if (zfs_prop_index_to_string(prop, val, &strval) != 0) return (-1); (void) strlcpy(propbuf, strval, proplen); break; default: abort(); } } get_source(zhp, src, source, statbuf, statlen); return (0); } /* * Utility function to get the given numeric property. Does no validation that * the given property is the appropriate type; should only be used with * hard-coded property types. */ uint64_t zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) { char *source; uint64_t val; (void) get_numeric_property(zhp, prop, NULL, &source, &val); return (val); } int zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val) { char buf[64]; (void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val); return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf)); } /* * Similar to zfs_prop_get(), but returns the value as an integer. */ int zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, zprop_source_t *src, char *statbuf, size_t statlen) { char *source; /* * Check to see if this property applies to our object */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) { return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE, dgettext(TEXT_DOMAIN, "cannot get property '%s'"), zfs_prop_to_name(prop))); } if (src) *src = ZPROP_SRC_NONE; if (get_numeric_property(zhp, prop, src, &source, value) != 0) return (-1); get_source(zhp, src, source, statbuf, statlen); return (0); } #ifdef HAVE_IDMAP static int idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser, char **domainp, idmap_rid_t *ridp) { idmap_get_handle_t *get_hdl = NULL; idmap_stat status; int err = EINVAL; if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS) goto out; if (isuser) { err = idmap_get_sidbyuid(get_hdl, id, IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); } else { err = idmap_get_sidbygid(get_hdl, id, IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); } if (err == IDMAP_SUCCESS && idmap_get_mappings(get_hdl) == IDMAP_SUCCESS && status == IDMAP_SUCCESS) err = 0; else err = EINVAL; out: if (get_hdl) idmap_get_destroy(get_hdl); return (err); } #endif /* HAVE_IDMAP */ /* * convert the propname into parameters needed by kernel * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829 * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789 * Eg: groupquota@staff -> ZFS_PROP_GROUPQUOTA, "", 1234 * Eg: groupused@staff -> ZFS_PROP_GROUPUSED, "", 1234 */ static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp) { zfs_userquota_prop_t type; char *cp; boolean_t isuser; boolean_t isgroup; struct passwd *pw; struct group *gr; domain[0] = '\0'; /* Figure out the property type ({user|group}{quota|space}) */ for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) { if (strncmp(propname, zfs_userquota_prop_prefixes[type], strlen(zfs_userquota_prop_prefixes[type])) == 0) break; } if (type == ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); *typep = type; isuser = (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_USERUSED); isgroup = (type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_GROUPUSED); cp = strchr(propname, '@') + 1; if (isuser && (pw = getpwnam(cp)) != NULL) { if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); *ridp = pw->pw_uid; } else if (isgroup && (gr = getgrnam(cp)) != NULL) { if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); *ridp = gr->gr_gid; } else if (strchr(cp, '@')) { #ifdef HAVE_IDMAP /* * It's a SID name (eg "user@domain") that needs to be * turned into S-1-domainID-RID. */ directory_error_t e; char *numericsid = NULL; char *end; if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); if (isuser) { e = directory_sid_from_user_name(NULL, cp, &numericsid); } else { e = directory_sid_from_group_name(NULL, cp, &numericsid); } if (e != NULL) { directory_error_free(e); return (ENOENT); } if (numericsid == NULL) return (ENOENT); cp = numericsid; (void) strlcpy(domain, cp, domainlen); cp = strrchr(domain, '-'); *cp = '\0'; cp++; errno = 0; *ridp = strtoull(cp, &end, 10); free(numericsid); if (errno != 0 || *end != '\0') return (EINVAL); #else return (ENOSYS); #endif /* HAVE_IDMAP */ } else { #ifdef HAVE_IDMAP /* It's a user/group ID (eg "12345"). */ uid_t id; idmap_rid_t rid; char *mapdomain; char *end; id = strtoul(cp, &end, 10); if (*end != '\0') return (EINVAL); if (id > MAXUID) { /* It's an ephemeral ID. */ if (idmap_id_to_numeric_domain_rid(id, isuser, &mapdomain, &rid) != 0) return (ENOENT); (void) strlcpy(domain, mapdomain, domainlen); *ridp = rid; } else { *ridp = id; } #else return (ENOSYS); #endif /* HAVE_IDMAP */ } return (0); } static int zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue, zfs_userquota_prop_t *typep) { int err; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; - (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); err = userquota_propname_decode(propname, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid); zc.zc_objset_type = *typep; if (err) return (err); err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc); if (err) return (err); *propvalue = zc.zc_cookie; return (0); } int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) { zfs_userquota_prop_t type; return (zfs_prop_get_userquota_common(zhp, propname, propvalue, &type)); } int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal) { int err; uint64_t propvalue; zfs_userquota_prop_t type; err = zfs_prop_get_userquota_common(zhp, propname, &propvalue, &type); if (err) return (err); if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)propvalue); } else if (propvalue == 0 && (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) { (void) strlcpy(propbuf, "none", proplen); } else { zfs_nicenum(propvalue, propbuf, proplen); } return (0); } -/* - * Returns the name of the given zfs handle. - */ -const char * -zfs_get_name(const zfs_handle_t *zhp) +int +zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue) { - return (zhp->zfs_name); -} + int err; + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + const char *snapname; -/* - * Returns the type of the given zfs handle. - */ -zfs_type_t -zfs_get_type(const zfs_handle_t *zhp) -{ - return (zhp->zfs_type); -} + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); -static int -zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) -{ - int rc; - uint64_t orig_cookie; + snapname = strchr(propname, '@') + 1; + if (strchr(snapname, '@')) { + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + } else { + /* snapname is the short name, append it to zhp's fsname */ + char *cp; + + (void) strlcpy(zc.zc_value, zhp->zfs_name, + sizeof (zc.zc_value)); + cp = strchr(zc.zc_value, '@'); + if (cp != NULL) + *cp = '\0'; + (void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value)); + (void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value)); + } - orig_cookie = zc->zc_cookie; -top: - (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); - rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc); + if (err) + return (err); - if (rc == -1) { - switch (errno) { - case ENOMEM: - /* expand nvlist memory and try again */ - if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { - zcmd_free_nvlists(zc); - return (-1); - } - zc->zc_cookie = orig_cookie; - goto top; - /* - * An errno value of ESRCH indicates normal completion. - * If ENOENT is returned, then the underlying dataset - * has been removed since we obtained the handle. - */ - case ESRCH: - case ENOENT: - rc = 1; - break; - default: - rc = zfs_standard_error(zhp->zfs_hdl, errno, - dgettext(TEXT_DOMAIN, - "cannot iterate filesystems")); - break; - } - } - return (rc); + *propvalue = zc.zc_cookie; + return (0); } -/* - * Iterate over all child filesystems - */ int -zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; - zfs_handle_t *nzhp; - int ret; - - if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) - return (0); + int err; + uint64_t propvalue; - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); + err = zfs_prop_get_written_int(zhp, propname, &propvalue); - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, - &zc)) == 0) { - /* - * Silently ignore errors, as the only plausible explanation is - * that the pool has since been removed. - */ - if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, - &zc)) == NULL) { - continue; - } + if (err) + return (err); - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - return (ret); - } + if (literal) { + (void) snprintf(propbuf, proplen, "%llu", (long long unsigned int)propvalue); + } else { + zfs_nicenum(propvalue, propbuf, proplen); } - zcmd_free_nvlists(&zc); - return ((ret < 0) ? ret : 0); + + return (0); } -/* - * Iterate over all snapshots - */ int -zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, - void *data) +zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, + uint64_t *usedp) { + int err; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; - zfs_handle_t *nzhp; - int ret; - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) - return (0); - - zc.zc_simple = simple; + (void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value)); - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, - &zc)) == 0) { + err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc); + if (err) + return (err); - if (simple) - nzhp = make_dataset_simple_handle_zc(zhp, &zc); - else - nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); - if (nzhp == NULL) - continue; + *usedp = zc.zc_cookie; - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - return (ret); - } - } - zcmd_free_nvlists(&zc); - return ((ret < 0) ? ret : 0); + return (0); } /* - * Iterate over all children, snapshots and filesystems + * Returns the name of the given zfs handle. */ -int -zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +const char * +zfs_get_name(const zfs_handle_t *zhp) { - int ret; - - if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0) - return (ret); + return (zhp->zfs_name); +} - return (zfs_iter_snapshots(zhp, B_FALSE, func, data)); +/* + * Returns the type of the given zfs handle. + */ +zfs_type_t +zfs_get_type(const zfs_handle_t *zhp) +{ + return (zhp->zfs_type); } /* * Is one dataset name a child dataset of another? * * Needs to handle these cases: * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo" * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar" * Descendant? No. No. No. Yes. */ static boolean_t is_descendant(const char *ds1, const char *ds2) { size_t d1len = strlen(ds1); /* ds2 can't be a descendant if it's smaller */ if (strlen(ds2) < d1len) return (B_FALSE); /* otherwise, compare strings and verify that there's a '/' char */ return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0)); } /* * Given a complete name, return just the portion that refers to the parent. - * Can return NULL if this is a pool. + * Will return -1 if there is no parent (path is just the name of the + * pool). */ static int parent_name(const char *path, char *buf, size_t buflen) { - char *loc; + char *slashp; - if ((loc = strrchr(path, '/')) == NULL) - return (-1); + (void) strlcpy(buf, path, buflen); - (void) strncpy(buf, path, MIN(buflen, loc - path)); - buf[loc - path] = '\0'; + if ((slashp = strrchr(buf, '/')) == NULL) + return (-1); + *slashp = '\0'; return (0); } /* * If accept_ancestor is false, then check to make sure that the given path has * a parent, and that it exists. If accept_ancestor is true, then find the * closest existing ancestor for the given path. In prefixlen return the * length of already existing prefix of the given path. We also fetch the * 'zoned' property, which is used to validate property settings when creating * new datasets. */ static int check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, boolean_t accept_ancestor, int *prefixlen) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char parent[ZFS_MAXNAMELEN]; char *slash; zfs_handle_t *zhp; char errbuf[1024]; uint64_t is_zoned; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); /* get parent, and check to see if this is just a pool */ if (parent_name(path, parent, sizeof (parent)) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing dataset name")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* check to see if the pool exists */ if ((slash = strchr(parent, '/')) == NULL) slash = parent + strlen(parent); (void) strncpy(zc.zc_name, parent, slash - parent); zc.zc_name[slash - parent] = '\0'; if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 && errno == ENOENT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool '%s'"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* check to see if the parent dataset exists */ while ((zhp = make_dataset_handle(hdl, parent)) == NULL) { if (errno == ENOENT && accept_ancestor) { /* * Go deeper to find an ancestor, give up on top level. */ if (parent_name(parent, parent, sizeof (parent)) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool '%s'"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } } else if (errno == ENOENT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent does not exist")); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } else return (zfs_standard_error(hdl, errno, errbuf)); } is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); if (zoned != NULL) *zoned = is_zoned; /* we are in a non-global zone, but parent is in the global zone */ if (getzoneid() != GLOBAL_ZONEID && !is_zoned) { (void) zfs_standard_error(hdl, EPERM, errbuf); zfs_close(zhp); return (-1); } /* make sure parent is a filesystem */ if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent is not a filesystem")); (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); return (-1); } zfs_close(zhp); if (prefixlen != NULL) *prefixlen = strlen(parent); return (0); } /* * Finds whether the dataset of the given type(s) exists. */ boolean_t zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types) { zfs_handle_t *zhp; if (!zfs_validate_name(hdl, path, types, B_FALSE)) return (B_FALSE); /* * Try to get stats for the dataset, which will tell us if it exists. */ if ((zhp = make_dataset_handle(hdl, path)) != NULL) { int ds_type = zhp->zfs_type; zfs_close(zhp); if (types & ds_type) return (B_TRUE); } return (B_FALSE); } /* * Given a path to 'target', create all the ancestors between * the prefixlen portion of the path, and the target itself. * Fail if the initial prefixlen-ancestor does not already exist. */ int create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) { zfs_handle_t *h; char *cp; const char *opname; /* make sure prefix exists */ cp = target + prefixlen; if (*cp != '/') { assert(strchr(cp, '/') == NULL); h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); } else { *cp = '\0'; h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); *cp = '/'; } if (h == NULL) return (-1); zfs_close(h); /* * Attempt to create, mount, and share any ancestor filesystems, * up to the prefixlen-long one. */ for (cp = target + prefixlen + 1; (cp = strchr(cp, '/')); *cp = '/', cp++) { char *logstr; *cp = '\0'; h = make_dataset_handle(hdl, target); if (h) { /* it already exists, nothing to do here */ zfs_close(h); continue; } logstr = hdl->libzfs_log_str; hdl->libzfs_log_str = NULL; if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, NULL) != 0) { hdl->libzfs_log_str = logstr; opname = dgettext(TEXT_DOMAIN, "create"); goto ancestorerr; } hdl->libzfs_log_str = logstr; h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); if (h == NULL) { opname = dgettext(TEXT_DOMAIN, "open"); goto ancestorerr; } if (zfs_mount(h, NULL, 0) != 0) { opname = dgettext(TEXT_DOMAIN, "mount"); goto ancestorerr; } if (zfs_share(h) != 0) { opname = dgettext(TEXT_DOMAIN, "share"); goto ancestorerr; } zfs_close(h); } return (0); ancestorerr: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to %s ancestor '%s'"), opname, target); return (-1); } /* * Creates non-existing ancestors of the given path. */ int zfs_create_ancestors(libzfs_handle_t *hdl, const char *path) { int prefix; char *path_copy; int rc = 0; if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0) return (-1); if ((path_copy = strdup(path)) != NULL) { rc = create_parents(hdl, path_copy, prefix); free(path_copy); } if (path_copy == NULL || rc != 0) return (-1); return (0); } /* * Create a new filesystem or volume. */ int zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, nvlist_t *props) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); char errbuf[1024]; uint64_t zoned; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); /* validate the path, taking care to note the extended error message */ if (!zfs_validate_name(hdl, path, type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents exist */ if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0) return (-1); /* * The failure modes when creating a dataset of a different type over * one that already exists is a little strange. In particular, if you * try to create a dataset on top of an existing dataset, the ioctl() * will return ENOENT, not EEXIST. To prevent this from happening, we * first try to see if the dataset exists. */ (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (type == ZFS_TYPE_VOLUME) zc.zc_objset_type = DMU_OST_ZVOL; else zc.zc_objset_type = DMU_OST_ZFS; if (props && (props = zfs_valid_proplist(hdl, type, props, zoned, NULL, errbuf)) == 0) return (-1); if (type == ZFS_TYPE_VOLUME) { /* * If we are creating a volume, the size and block size must * satisfy a few restraints. First, the blocksize must be a * valid block size between SPA_{MIN,MAX}BLOCKSIZE. Second, the * volsize must be a multiple of the block size, and cannot be * zero. */ if (props == NULL || nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing volume size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } if ((ret = nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &blocksize)) != 0) { if (ret == ENOENT) { blocksize = zfs_prop_default_numeric( ZFS_PROP_VOLBLOCKSIZE); } else { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing volume block size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } } if (size == 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume size cannot be zero")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } if (size % blocksize != 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume size must be a multiple of volume block " "size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } } if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) return (-1); nvlist_free(props); /* create the dataset */ ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); if (ret == 0 && type == ZFS_TYPE_VOLUME) { ret = zvol_create_link(hdl, path); if (ret) { (void) zfs_standard_error(hdl, errno, dgettext(TEXT_DOMAIN, "Volume successfully created, but device links " "were not created")); zcmd_free_nvlists(&zc); return (-1); } } zcmd_free_nvlists(&zc); /* check for failure */ if (ret != 0) { char parent[ZFS_MAXNAMELEN]; (void) parent_name(path, parent, sizeof (parent)); switch (errno) { case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such parent '%s'"), parent); return (zfs_error(hdl, EZFS_NOENT, errbuf)); case EINVAL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent '%s' is not a filesystem"), parent); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); case EDOM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume block size must be power of 2 from " "%u to %uk"), (uint_t)SPA_MINBLOCKSIZE, (uint_t)SPA_MAXBLOCKSIZE >> 10); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); #ifdef _ILP32 case EOVERFLOW: /* * This platform can't address a volume this big. */ if (type == ZFS_TYPE_VOLUME) return (zfs_error(hdl, EZFS_VOLTOOBIG, errbuf)); #endif /* FALLTHROUGH */ default: return (zfs_standard_error(hdl, errno, errbuf)); } } return (0); } /* * Destroys the given dataset. The caller must make sure that the filesystem * isn't mounted, and that there are no active dependents. */ int zfs_destroy(zfs_handle_t *zhp, boolean_t defer) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ZFS_IS_VOLUME(zhp)) { if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) return (-1); zc.zc_objset_type = DMU_OST_ZVOL; } else { zc.zc_objset_type = DMU_OST_ZFS; } zc.zc_defer_destroy = defer; if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) { return (zfs_standard_error_fmt(zhp->zfs_hdl, errno, dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zfs_name)); } remove_mountpoint(zhp); return (0); } struct destroydata { - char *snapname; - boolean_t gotone; - boolean_t closezhp; + nvlist_t *nvl; + const char *snapname; }; static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; - boolean_t closezhp = dd->closezhp; int rv = 0; - (void) strlcpy(name, zhp->zfs_name, sizeof (name)); - (void) strlcat(name, "@", sizeof (name)); - (void) strlcat(name, dd->snapname, sizeof (name)); + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, dd->snapname); szhp = make_dataset_handle(zhp->zfs_hdl, name); if (szhp) { - dd->gotone = B_TRUE; + verify(nvlist_add_boolean(dd->nvl, name) == 0); zfs_close(szhp); } if (zhp->zfs_type == ZFS_TYPE_VOLUME) { (void) zvol_remove_link(zhp->zfs_hdl, name); /* * NB: this is simply a best-effort. We don't want to * return an error, because then we wouldn't visit all * the volumes. */ } - dd->closezhp = B_TRUE; - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg); - if (closezhp) - zfs_close(zhp); + rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); + zfs_close(zhp); return (rv); } /* * Destroys all snapshots with the given name in zhp & descendants. */ int zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; struct destroydata dd = { 0 }; dd.snapname = snapname; - (void) zfs_check_snap_cb(zhp, &dd); + verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); + (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (!dd.gotone) { - return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, + if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), - zhp->zfs_name, snapname)); + zhp->zfs_name, snapname); + } else { + ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer); } + nvlist_free(dd.nvl); + return (ret); +} + +/* + * Destroys all the snapshots named in the nvlist. They must be underneath + * the zhp (either snapshots of it, or snapshots of its descendants). + */ +int +zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) +{ + int ret; + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0) + return (-1); zc.zc_defer_destroy = defer; - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc); + ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc); if (ret != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot destroy '%s@%s'"), zc.zc_name, snapname); + "cannot destroy snapshots in %s"), zc.zc_name); switch (errno) { case EEXIST: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "snapshot is cloned")); return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf)); default: return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } } return (0); } /* * Clones the given dataset. The target must be of the same type as the source. */ int zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char parent[ZFS_MAXNAMELEN]; int ret; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_type_t type; uint64_t zoned; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), target); - /* validate the target name */ + /* validate the target/clone name */ if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents exist */ if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0) return (-1); (void) parent_name(target, parent, sizeof (parent)); /* do the clone */ if (ZFS_IS_VOLUME(zhp)) { zc.zc_objset_type = DMU_OST_ZVOL; type = ZFS_TYPE_VOLUME; } else { zc.zc_objset_type = DMU_OST_ZFS; type = ZFS_TYPE_FILESYSTEM; } if (props) { if ((props = zfs_valid_proplist(hdl, type, props, zoned, zhp, errbuf)) == NULL) return (-1); if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { nvlist_free(props); return (-1); } nvlist_free(props); } (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc); zcmd_free_nvlists(&zc); if (ret != 0) { switch (errno) { case ENOENT: /* * The parent doesn't exist. We should have caught this * above, but there may a race condition that has since * destroyed the parent. * * At this point, we don't know whether it's the source * that doesn't exist anymore, or whether the target * dataset doesn't exist. */ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "no such parent '%s'"), parent); return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); case EXDEV: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "source and target pools differ")); return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, errbuf)); default: return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } } else if (ZFS_IS_VOLUME(zhp)) { ret = zvol_create_link(zhp->zfs_hdl, target); } return (ret); } typedef struct promote_data { char cb_mountpoint[MAXPATHLEN]; const char *cb_target; const char *cb_errbuf; uint64_t cb_pivot_txg; } promote_data_t; static int promote_snap_cb(zfs_handle_t *zhp, void *data) { promote_data_t *pd = data; zfs_handle_t *szhp; char snapname[MAXPATHLEN]; int rv = 0; /* We don't care about snapshots after the pivot point */ if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) { zfs_close(zhp); return (0); } /* Remove the device link if it's a zvol. */ if (ZFS_IS_VOLUME(zhp)) (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); /* Check for conflicting names */ (void) strlcpy(snapname, pd->cb_target, sizeof (snapname)); (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname)); szhp = make_dataset_handle(zhp->zfs_hdl, snapname); if (szhp != NULL) { zfs_close(szhp); zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "snapshot name '%s' from origin \n" "conflicts with '%s' from target"), zhp->zfs_name, snapname); rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf); } zfs_close(zhp); return (rv); } static int promote_snap_done_cb(zfs_handle_t *zhp, void *data) { promote_data_t *pd = data; /* We don't care about snapshots after the pivot point */ if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) { /* Create the device link if it's a zvol. */ if (ZFS_IS_VOLUME(zhp)) (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); } zfs_close(zhp); return (0); } /* * Promotes the given clone fs to be the clone parent. */ int zfs_promote(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char parent[MAXPATHLEN]; char *cp; int ret; zfs_handle_t *pzhp; promote_data_t pd; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot promote '%s'"), zhp->zfs_name); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshots can not be promoted")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } (void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent)); if (parent[0] == '\0') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not a cloned filesystem")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } cp = strchr(parent, '@'); *cp = '\0'; /* Walk the snapshots we will be moving */ pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); if (pzhp == NULL) return (-1); pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); zfs_close(pzhp); pd.cb_target = zhp->zfs_name; pd.cb_errbuf = errbuf; pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET); if (pzhp == NULL) return (-1); (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint, sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE); ret = zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_cb, &pd); if (ret != 0) { zfs_close(pzhp); return (-1); } /* issue the ioctl */ (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); if (ret != 0) { int save_errno = errno; (void) zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_done_cb, &pd); zfs_close(pzhp); switch (save_errno) { case EEXIST: /* * There is a conflicting snapshot name. We * should have caught this above, but they could * have renamed something in the mean time. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "conflicting snapshot '%s' from parent '%s'"), zc.zc_string, parent); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); default: return (zfs_standard_error(hdl, save_errno, errbuf)); } } else { (void) zfs_iter_snapshots(zhp, B_FALSE, promote_snap_done_cb, &pd); } zfs_close(pzhp); return (ret); } struct createdata { const char *cd_snapname; int cd_ifexists; }; static int zfs_create_link_cb(zfs_handle_t *zhp, void *arg) { struct createdata *cd = arg; int ret; if (zhp->zfs_type == ZFS_TYPE_VOLUME) { char name[MAXPATHLEN]; (void) strlcpy(name, zhp->zfs_name, sizeof (name)); (void) strlcat(name, "@", sizeof (name)); (void) strlcat(name, cd->cd_snapname, sizeof (name)); (void) zvol_create_link_common(zhp->zfs_hdl, name, cd->cd_ifexists); /* * NB: this is simply a best-effort. We don't want to * return an error, because then we wouldn't visit all * the volumes. */ } ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd); zfs_close(zhp); return (ret); } /* * Takes a snapshot of the given dataset. */ int zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, nvlist_t *props) { const char *delim; char parent[ZFS_MAXNAMELEN]; zfs_handle_t *zhp; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot snapshot '%s'"), path); /* validate the target name */ if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); if (props) { if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, props, B_FALSE, NULL, errbuf)) == NULL) return (-1); if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { nvlist_free(props); return (-1); } nvlist_free(props); } /* make sure the parent exists and is of the appropriate type */ delim = strchr(path, '@'); (void) strncpy(parent, path, delim - path); parent[delim - path] = '\0'; if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { zcmd_free_nvlists(&zc); return (-1); } (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value)); if (ZFS_IS_VOLUME(zhp)) zc.zc_objset_type = DMU_OST_ZVOL; else zc.zc_objset_type = DMU_OST_ZFS; zc.zc_cookie = recursive; ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc); zcmd_free_nvlists(&zc); /* * if it was recursive, the one that actually failed will be in * zc.zc_name. */ if (ret != 0) (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); if (ret == 0 && recursive) { struct createdata cd; cd.cd_snapname = delim + 1; cd.cd_ifexists = B_FALSE; (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd); } if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { ret = zvol_create_link(zhp->zfs_hdl, path); if (ret != 0) { (void) zfs_standard_error(hdl, errno, dgettext(TEXT_DOMAIN, "Volume successfully snapshotted, but device links " "were not created")); zfs_close(zhp); return (-1); } } if (ret != 0) (void) zfs_standard_error(hdl, errno, errbuf); zfs_close(zhp); return (ret); } /* * Destroy any more recent snapshots. We invoke this callback on any dependents * of the snapshot first. If the 'cb_dependent' member is non-zero, then this * is a dependent and we should just destroy it without checking the transaction * group. */ typedef struct rollback_data { const char *cb_target; /* the snapshot */ uint64_t cb_create; /* creation time reference */ boolean_t cb_error; boolean_t cb_dependent; boolean_t cb_force; } rollback_data_t; static int rollback_destroy(zfs_handle_t *zhp, void *data) { rollback_data_t *cbp = data; if (!cbp->cb_dependent) { if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 && zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { char *logstr; cbp->cb_dependent = B_TRUE; cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, rollback_destroy, cbp); cbp->cb_dependent = B_FALSE; logstr = zhp->zfs_hdl->libzfs_log_str; zhp->zfs_hdl->libzfs_log_str = NULL; cbp->cb_error |= zfs_destroy(zhp, B_FALSE); zhp->zfs_hdl->libzfs_log_str = logstr; } } else { /* We must destroy this clone; first unmount it */ prop_changelist_t *clp; clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, cbp->cb_force ? MS_FORCE: 0); if (clp == NULL || changelist_prefix(clp) != 0) { cbp->cb_error = B_TRUE; zfs_close(zhp); return (0); } if (zfs_destroy(zhp, B_FALSE) != 0) cbp->cb_error = B_TRUE; else changelist_remove(clp, zhp->zfs_name); (void) changelist_postfix(clp); changelist_free(clp); } zfs_close(zhp); return (0); } /* * Given a dataset, rollback to a specific snapshot, discarding any * data changes since then and making it the active dataset. * * Any snapshots more recent than the target are destroyed, along with * their dependents. */ int zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) { rollback_data_t cb = { 0 }; int err; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; boolean_t restore_resv = 0; uint64_t old_volsize = 0, new_volsize; zfs_prop_t resv_prop = { 0 }; assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM || zhp->zfs_type == ZFS_TYPE_VOLUME); /* * Destroy all recent snapshots and its dependends. */ cb.cb_force = force; cb.cb_target = snap->zfs_name; cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); (void) zfs_iter_children(zhp, rollback_destroy, &cb); if (cb.cb_error) return (-1); /* * Now that we have verified that the snapshot is the latest, * rollback to the given snapshot. */ if (zhp->zfs_type == ZFS_TYPE_VOLUME) { if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) return (-1); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); restore_resv = (old_volsize == zfs_prop_get_int(zhp, resv_prop)); } (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ZFS_IS_VOLUME(zhp)) zc.zc_objset_type = DMU_OST_ZVOL; else zc.zc_objset_type = DMU_OST_ZFS; /* * We rely on zfs_iter_children() to verify that there are no * newer snapshots for the given dataset. Therefore, we can * simply pass the name on to the ioctl() call. There is still * an unlikely race condition where the user has taken a * snapshot since we verified that this was the most recent. * */ if ((err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) { (void) zfs_standard_error_fmt(zhp->zfs_hdl, errno, dgettext(TEXT_DOMAIN, "cannot rollback '%s'"), zhp->zfs_name); return (err); } /* * For volumes, if the pre-rollback volsize matched the pre- * rollback reservation and the volsize has changed then set * the reservation property to the post-rollback volsize. * Make a new handle since the rollback closed the dataset. */ if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) { zfs_close(zhp); return (err); } if (restore_resv) { new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (old_volsize != new_volsize) err = zfs_prop_set_int(zhp, resv_prop, new_volsize); } zfs_close(zhp); } return (err); } -/* - * Iterate over all dependents for a given dataset. This includes both - * hierarchical dependents (children) and data dependents (snapshots and - * clones). The bulk of the processing occurs in get_dependents() in - * libzfs_graph.c. - */ -int -zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, - zfs_iter_f func, void *data) -{ - char **dependents; - size_t count; - int i; - zfs_handle_t *child; - int ret = 0; - - if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name, - &dependents, &count) != 0) - return (-1); - - for (i = 0; i < count; i++) { - if ((child = make_dataset_handle(zhp->zfs_hdl, - dependents[i])) == NULL) - continue; - - if ((ret = func(child, data)) != 0) - break; - } - - for (i = 0; i < count; i++) - free(dependents[i]); - free(dependents); - - return (ret); -} - /* * Renames the given dataset. */ int zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) { int ret; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char *delim; prop_changelist_t *cl = NULL; zfs_handle_t *zhrp = NULL; char *parentname = NULL; char parent[ZFS_MAXNAMELEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; /* if we have the same exact name, just return success */ if (strcmp(zhp->zfs_name, target) == 0) return (0); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename to '%s'"), target); /* * Make sure the target name is valid */ if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { if ((strchr(target, '@') == NULL) || *target == '@') { /* * Snapshot target name is abbreviated, * reconstruct full dataset name */ (void) strlcpy(parent, zhp->zfs_name, sizeof (parent)); delim = strchr(parent, '@'); if (strchr(target, '@') == NULL) *(++delim) = '\0'; else *delim = '\0'; (void) strlcat(parent, target, sizeof (parent)); target = parent; } else { /* * Make sure we're renaming within the same dataset. */ delim = strchr(target, '@'); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '@') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshots must be part of same " "dataset")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } } if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } else { if (recursive) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "recursive rename must be a snapshot")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents */ if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0) return (-1); /* make sure we're in the same pool */ verify((delim = strchr(target, '/')) != NULL); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '/') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "datasets must be within same pool")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } /* new name cannot be a child of the current dataset name */ if (is_descendant(zhp->zfs_name, target)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "New dataset name cannot be a descendant of " "current dataset name")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } } (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name); if (getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is used in a non-global zone")); return (zfs_error(hdl, EZFS_ZONED, errbuf)); } if (recursive) { struct destroydata dd; parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { ret = -1; goto error; } delim = strchr(parentname, '@'); *delim = '\0'; zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); if (zhrp == NULL) { ret = -1; goto error; } dd.snapname = delim + 1; - dd.gotone = B_FALSE; - dd.closezhp = B_TRUE; /* We remove any zvol links prior to renaming them */ ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd); if (ret) { goto error; } } else { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL) return (-1); if (changelist_haszonedchild(cl)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); (void) zfs_error(hdl, EZFS_ZONED, errbuf); ret = -1; goto error; } if ((ret = changelist_prefix(cl)) != 0) goto error; } if (ZFS_IS_VOLUME(zhp)) zc.zc_objset_type = DMU_OST_ZVOL; else zc.zc_objset_type = DMU_OST_ZFS; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); zc.zc_cookie = recursive; if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) { /* * if it was recursive, the one that actually failed will * be in zc.zc_name */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zc.zc_name); if (recursive && errno == EEXIST) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "a child dataset already has a snapshot " "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); } /* * On failure, we still want to remount any filesystems that * were previously mounted, so we don't alter the system state. */ if (recursive) { struct createdata cd; /* only create links for datasets that had existed */ cd.cd_snapname = delim + 1; cd.cd_ifexists = B_TRUE; (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb, &cd); } else { (void) changelist_postfix(cl); } } else { if (recursive) { struct createdata cd; /* only create links for datasets that had existed */ cd.cd_snapname = strchr(target, '@') + 1; cd.cd_ifexists = B_TRUE; ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb, &cd); } else { changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } } error: if (parentname) { free(parentname); } if (zhrp) { zfs_close(zhrp); } if (cl) { changelist_free(cl); } return (ret); } /* * Given a zvol dataset, issue the ioctl to create the appropriate minor node, * and wait briefly for udev to create the /dev link. */ int zvol_create_link(libzfs_handle_t *hdl, const char *dataset) { return (zvol_create_link_common(hdl, dataset, B_FALSE)); } static int zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char path[MAXPATHLEN]; int error; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); /* * Issue the appropriate ioctl. */ if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) { switch (errno) { case EEXIST: /* * Silently ignore the case where the link already * exists. This allows 'zfs volinit' to be run multiple * times without errors. */ return (0); case ENOENT: /* * Dataset does not exist in the kernel. If we * don't care (see zfs_rename), then ignore the * error quietly. */ if (ifexists) { return (0); } /* FALLTHROUGH */ default: return (zfs_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot create device links " "for '%s'"), dataset)); } } /* * Wait up to 10 seconds for udev to create the device. */ (void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset); error = zpool_label_disk_wait(path, 10000); if (error) (void) printf(gettext("%s may not be immediately " "available\n"), path); return (0); } /* * Remove a minor node for the given zvol and the associated /dev links. */ int zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int timeout = 3000; /* in milliseconds */ int error = 0; int i; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); /* * Due to concurrent updates by udev the device may be reported as * busy. In this case don't immediately fail. Instead briefly delay * and retry the ioctl() which is now likely to succeed. If unable * remove the link after timeout milliseconds return the failure. */ for (i = 0; i < timeout; i++) { error = ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); if (error && errno == EBUSY) { usleep(1000); continue; } else { break; } } if (error) { switch (errno) { case ENXIO: /* * Silently ignore the case where the link no longer * exists, so that 'zfs volfini' can be run multiple * times without errors. */ return (0); default: return (zfs_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot remove device " "links for '%s': %s"), dataset, strerror(errno))); } } return (0); } nvlist_t * zfs_get_user_props(zfs_handle_t *zhp) { return (zhp->zfs_user_props); } /* * This function is used by 'zfs list' to determine the exact set of columns to * display, and their maximum widths. This does two main things: * * - If this is a list of all properties, then expand the list to include * all native properties, and set a flag so that for each dataset we look * for new unique user properties and add them to the list. * * - For non fixed-width properties, keep track of the maximum width seen * so that we can size the column appropriately. If the user has * requested received property values, we also need to compute the width * of the RECEIVED column. */ int zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) { libzfs_handle_t *hdl = zhp->zfs_hdl; zprop_list_t *entry; zprop_list_t **last, **start; nvlist_t *userprops, *propval; nvpair_t *elem; char *strval; char buf[ZFS_MAXPROPLEN]; if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0) return (-1); userprops = zfs_get_user_props(zhp); entry = *plp; if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) { /* * Go through and add any user properties as necessary. We * start by incrementing our list pointer to the first * non-native property. */ start = plp; while (*start != NULL) { if ((*start)->pl_prop == ZPROP_INVAL) break; start = &(*start)->pl_next; } elem = NULL; while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) { /* * See if we've already found this property in our list. */ for (last = start; *last != NULL; last = &(*last)->pl_next) { if (strcmp((*last)->pl_user_prop, nvpair_name(elem)) == 0) break; } if (*last == NULL) { if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL || ((entry->pl_user_prop = zfs_strdup(hdl, nvpair_name(elem)))) == NULL) { free(entry); return (-1); } entry->pl_prop = ZPROP_INVAL; entry->pl_width = strlen(nvpair_name(elem)); entry->pl_all = B_TRUE; *last = entry; } } } /* * Now go through and check the width of any non-fixed columns */ for (entry = *plp; entry != NULL; entry = entry->pl_next) { if (entry->pl_fixed) continue; if (entry->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, entry->pl_prop, buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) { if (strlen(buf) > entry->pl_width) entry->pl_width = strlen(buf); } if (received && zfs_prop_get_recvd(zhp, zfs_prop_to_name(entry->pl_prop), buf, sizeof (buf), B_FALSE) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } else { if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop, &propval) == 0) { verify(nvlist_lookup_string(propval, ZPROP_VALUE, &strval) == 0); if (strlen(strval) > entry->pl_width) entry->pl_width = strlen(strval); } if (received && zfs_prop_get_recvd(zhp, entry->pl_user_prop, buf, sizeof (buf), B_FALSE) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } } return (0); } void zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) { nvpair_t *curr; /* * Keep a reference to the props-table against which we prune the * properties. */ zhp->zfs_props_table = props; curr = nvlist_next_nvpair(zhp->zfs_props, NULL); while (curr) { zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr)); nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr); /* * User properties will result in ZPROP_INVAL, and since we * only know how to prune standard ZFS properties, we always * leave these in the list. This can also happen if we * encounter an unknown DSL property (when running older * software, for example). */ if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE) (void) nvlist_remove(zhp->zfs_props, nvpair_name(curr), nvpair_type(curr)); curr = next; } } static int zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, zfs_smb_acl_op_t cmd, char *resource1, char *resource2) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; nvlist_t *nvlist = NULL; int error; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); zc.zc_cookie = (uint64_t)cmd; if (cmd == ZFS_SMB_ACL_RENAME) { if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (-1); } } switch (cmd) { case ZFS_SMB_ACL_ADD: case ZFS_SMB_ACL_REMOVE: (void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string)); break; case ZFS_SMB_ACL_RENAME: if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC, resource1) != 0) { (void) no_memory(hdl); return (-1); } if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET, resource2) != 0) { (void) no_memory(hdl); return (-1); } if (zcmd_write_src_nvlist(hdl, &zc, nvlist) != 0) { nvlist_free(nvlist); return (-1); } break; case ZFS_SMB_ACL_PURGE: break; default: return (-1); } error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc); if (nvlist) nvlist_free(nvlist); return (error); } int zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset, char *path, char *resource) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD, resource, NULL)); } int zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset, char *path, char *resource) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE, resource, NULL)); } int zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE, NULL, NULL)); } int zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path, char *oldname, char *newname) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME, oldname, newname)); } int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zfs_userspace_cb_t func, void *arg) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int error; zfs_useracct_t buf[100]; - (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zc.zc_objset_type = type; zc.zc_nvlist_dst = (uintptr_t)buf; /* CONSTCOND */ while (1) { zfs_useracct_t *zua = buf; zc.zc_nvlist_dst_size = sizeof (buf); error = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_MANY, &zc); if (error || zc.zc_nvlist_dst_size == 0) break; while (zc.zc_nvlist_dst_size > 0) { error = func(arg, zua->zu_domain, zua->zu_rid, zua->zu_space); if (error != 0) return (error); zua++; zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t); } } return (error); } int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, int cleanup_fd, uint64_t dsobj, uint64_t createtxg) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; ASSERT(!recursive || dsobj == 0); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) >= sizeof (zc.zc_string)) return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); zc.zc_cookie = recursive; zc.zc_temphold = temphold; zc.zc_cleanup_fd = cleanup_fd; zc.zc_sendobj = dsobj; zc.zc_createtxg = createtxg; if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { char errbuf[ZFS_MAXNAMELEN+32]; /* * if it was recursive, the one that actually failed will be in * zc.zc_name. */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold '%s@%s'"), zc.zc_name, snapname); switch (errno) { case E2BIG: /* * Temporary tags wind up having the ds object id * prepended. So even if we passed the length check * above, it's still possible for the tag to wind * up being slightly too long. */ return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); case EINVAL: return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); case EEXIST: return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf)); case ENOENT: if (enoent_ok) return (ENOENT); /* FALLTHROUGH */ default: return (zfs_standard_error_fmt(hdl, errno, errbuf)); } } return (0); } int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) >= sizeof (zc.zc_string)) return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); zc.zc_cookie = recursive; if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) { char errbuf[ZFS_MAXNAMELEN+32]; /* * if it was recursive, the one that actually failed will be in * zc.zc_name. */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release '%s' from '%s@%s'"), tag, zc.zc_name, snapname); switch (errno) { case ESRCH: return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); case EINVAL: return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); default: return (zfs_standard_error_fmt(hdl, errno, errbuf)); } } return (0); } int zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; int err = 0; char errbuf[ZFS_MAXNAMELEN+32]; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); tryagain: nvbuf = malloc(nvsz); if (nvbuf == NULL) { err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); goto out; } zc.zc_nvlist_dst_size = nvsz; zc.zc_nvlist_dst = (uintptr_t)nvbuf; (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), zc.zc_name); switch (errno) { case ENOMEM: free(nvbuf); nvsz = zc.zc_nvlist_dst_size; goto tryagain; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error_fmt(hdl, errno, errbuf); break; } } else { /* success */ int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); if (rc) { (void) snprintf(errbuf, sizeof (errbuf), dgettext( TEXT_DOMAIN, "cannot get permissions on '%s'"), zc.zc_name); err = zfs_standard_error_fmt(hdl, rc, errbuf); } } free(nvbuf); out: return (err); } int zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; char *nvbuf; char errbuf[ZFS_MAXNAMELEN+32]; size_t nvsz; int err; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); assert(err == 0); nvbuf = malloc(nvsz); err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); assert(err == 0); zc.zc_nvlist_src_size = nvsz; zc.zc_nvlist_src = (uintptr_t)nvbuf; zc.zc_perm_action = un; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), zc.zc_name); switch (errno) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error_fmt(hdl, errno, errbuf); break; } } free(nvbuf); return (err); } int zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; int err = 0; char errbuf[ZFS_MAXNAMELEN+32]; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); tryagain: nvbuf = malloc(nvsz); if (nvbuf == NULL) { err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); goto out; } zc.zc_nvlist_dst_size = nvsz; zc.zc_nvlist_dst = (uintptr_t)nvbuf; (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), zc.zc_name); switch (errno) { case ENOMEM: free(nvbuf); nvsz = zc.zc_nvlist_dst_size; goto tryagain; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error_fmt(hdl, errno, errbuf); break; } } else { /* success */ int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); if (rc) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), zc.zc_name); err = zfs_standard_error_fmt(hdl, rc, errbuf); } } free(nvbuf); out: return (err); } uint64_t zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) { uint64_t numdb; uint64_t nblocks, volblocksize; int ncopies; char *strval; if (nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0) ncopies = atoi(strval); else ncopies = 1; if (nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) volblocksize = ZVOL_DEFAULT_BLOCKSIZE; nblocks = volsize/volblocksize; /* start with metadnode L0-L6 */ numdb = 7; /* calculate number of indirects */ while (nblocks > 1) { nblocks += DNODES_PER_LEVEL - 1; nblocks /= DNODES_PER_LEVEL; numdb += nblocks; } numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1); volsize *= ncopies; /* * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't * compressed, but in practice they compress down to about * 1100 bytes */ numdb *= 1ULL << DN_MAX_INDBLKSHIFT; volsize += numdb; return (volsize); } diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c new file mode 100644 index 000000000000..8215d3cb17af --- /dev/null +++ b/lib/libzfs/libzfs_iter.c @@ -0,0 +1,468 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libzfs_impl.h" + +int +zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + nvlist_t *nvl = zfs_get_clones_nvl(zhp); + nvpair_t *pair; + + if (nvl == NULL) + return (0); + + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + zfs_handle_t *clone = zfs_open(zhp->zfs_hdl, nvpair_name(pair), + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (clone != NULL) { + int err = func(clone, data); + if (err != 0) + return (err); + } + } + return (0); +} + +static int +zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) +{ + int rc; + uint64_t orig_cookie; + + orig_cookie = zc->zc_cookie; +top: + (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); + rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); + + if (rc == -1) { + switch (errno) { + case ENOMEM: + /* expand nvlist memory and try again */ + if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { + zcmd_free_nvlists(zc); + return (-1); + } + zc->zc_cookie = orig_cookie; + goto top; + /* + * An errno value of ESRCH indicates normal completion. + * If ENOENT is returned, then the underlying dataset + * has been removed since we obtained the handle. + */ + case ESRCH: + case ENOENT: + rc = 1; + break; + default: + rc = zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, + "cannot iterate filesystems")); + break; + } + } + return (rc); +} + +/* + * Iterate over all child filesystems + */ +int +zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) + return (0); + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, + &zc)) == 0) { + /* + * Silently ignore errors, as the only plausible explanation is + * that the pool has since been removed. + */ + if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, + &zc)) == NULL) { + continue; + } + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Iterate over all snapshots + */ +int +zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, + void *data) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) + return (0); + + zc.zc_simple = simple; + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc)) == 0) { + + if (simple) + nzhp = make_dataset_simple_handle_zc(zhp, &zc); + else + nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); + if (nzhp == NULL) + continue; + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Routines for dealing with the sorted snapshot functionality + */ +typedef struct zfs_node { + zfs_handle_t *zn_handle; + avl_node_t zn_avlnode; +} zfs_node_t; + +static int +zfs_sort_snaps(zfs_handle_t *zhp, void *data) +{ + avl_tree_t *avl = data; + zfs_node_t *node; + zfs_node_t search; + + search.zn_handle = zhp; + node = avl_find(avl, &search, NULL); + if (node) { + /* + * If this snapshot was renamed while we were creating the + * AVL tree, it's possible that we already inserted it under + * its old name. Remove the old handle before adding the new + * one. + */ + zfs_close(node->zn_handle); + avl_remove(avl, node); + free(node); + } + + node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); + node->zn_handle = zhp; + avl_add(avl, node); + + return (0); +} + +static int +zfs_snapshot_compare(const void *larg, const void *rarg) +{ + zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; + zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; + uint64_t lcreate, rcreate; + + /* + * Sort them according to creation time. We use the hidden + * CREATETXG property to get an absolute ordering of snapshots. + */ + lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); + rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); + + if (lcreate < rcreate) + return (-1); + else if (lcreate > rcreate) + return (+1); + else + return (0); +} + +int +zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) +{ + int ret = 0; + zfs_node_t *node; + avl_tree_t avl; + void *cookie = NULL; + + avl_create(&avl, zfs_snapshot_compare, + sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); + + ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl); + + for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) + ret |= callback(node->zn_handle, data); + + while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) + free(node); + + avl_destroy(&avl); + + return (ret); +} + +typedef struct { + char *ssa_first; + char *ssa_last; + boolean_t ssa_seenfirst; + boolean_t ssa_seenlast; + zfs_iter_f ssa_func; + void *ssa_arg; +} snapspec_arg_t; + +static int +snapspec_cb(zfs_handle_t *zhp, void *arg) { + snapspec_arg_t *ssa = arg; + char *shortsnapname; + int err = 0; + + if (ssa->ssa_seenlast) + return (0); + shortsnapname = zfs_strdup(zhp->zfs_hdl, + strchr(zfs_get_name(zhp), '@') + 1); + + if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0) + ssa->ssa_seenfirst = B_TRUE; + + if (ssa->ssa_seenfirst) { + err = ssa->ssa_func(zhp, ssa->ssa_arg); + } else { + zfs_close(zhp); + } + + if (strcmp(shortsnapname, ssa->ssa_last) == 0) + ssa->ssa_seenlast = B_TRUE; + free(shortsnapname); + + return (err); +} + +/* + * spec is a string like "A,B%C,D" + * + * , where can be: + * (single snapshot) + * % (range of snapshots, inclusive) + * % (range of snapshots, starting with earliest) + * % (range of snapshots, ending with last) + * % (all snapshots) + * [,...] (comma separated list of the above) + * + * If a snapshot can not be opened, continue trying to open the others, but + * return ENOENT at the end. + */ +int +zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, + zfs_iter_f func, void *arg) +{ + char buf[ZFS_MAXNAMELEN]; + char *comma_separated, *cp; + int err = 0; + int ret = 0; + + (void) strlcpy(buf, spec_orig, sizeof (buf)); + cp = buf; + + while ((comma_separated = strsep(&cp, ",")) != NULL) { + char *pct = strchr(comma_separated, '%'); + if (pct != NULL) { + snapspec_arg_t ssa = { 0 }; + ssa.ssa_func = func; + ssa.ssa_arg = arg; + + if (pct == comma_separated) + ssa.ssa_seenfirst = B_TRUE; + else + ssa.ssa_first = comma_separated; + *pct = '\0'; + ssa.ssa_last = pct + 1; + + /* + * If there is a lastname specified, make sure it + * exists. + */ + if (ssa.ssa_last[0] != '\0') { + char snapname[ZFS_MAXNAMELEN]; + (void) snprintf(snapname, sizeof (snapname), + "%s@%s", zfs_get_name(fs_zhp), + ssa.ssa_last); + if (!zfs_dataset_exists(fs_zhp->zfs_hdl, + snapname, ZFS_TYPE_SNAPSHOT)) { + ret = ENOENT; + continue; + } + } + + err = zfs_iter_snapshots_sorted(fs_zhp, + snapspec_cb, &ssa); + if (ret == 0) + ret = err; + if (ret == 0 && (!ssa.ssa_seenfirst || + (ssa.ssa_last[0] != '\0' && !ssa.ssa_seenlast))) { + ret = ENOENT; + } + } else { + char snapname[ZFS_MAXNAMELEN]; + zfs_handle_t *snap_zhp; + (void) snprintf(snapname, sizeof (snapname), "%s@%s", + zfs_get_name(fs_zhp), comma_separated); + snap_zhp = make_dataset_handle(fs_zhp->zfs_hdl, + snapname); + if (snap_zhp == NULL) { + ret = ENOENT; + continue; + } + err = func(snap_zhp, arg); + if (ret == 0) + ret = err; + } + } + + return (ret); +} + +/* + * Iterate over all children, snapshots and filesystems + */ +int +zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + int ret; + + if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0) + return (ret); + + return (zfs_iter_snapshots(zhp, B_FALSE, func, data)); +} + + +typedef struct iter_stack_frame { + struct iter_stack_frame *next; + zfs_handle_t *zhp; +} iter_stack_frame_t; + +typedef struct iter_dependents_arg { + boolean_t first; + boolean_t allowrecursion; + iter_stack_frame_t *stack; + zfs_iter_f func; + void *data; +} iter_dependents_arg_t; + +static int +iter_dependents_cb(zfs_handle_t *zhp, void *arg) +{ + iter_dependents_arg_t *ida = arg; + int err; + boolean_t first = ida->first; + ida->first = B_FALSE; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + err = zfs_iter_clones(zhp, iter_dependents_cb, ida); + } else { + iter_stack_frame_t isf; + iter_stack_frame_t *f; + + /* + * check if there is a cycle by seeing if this fs is already + * on the stack. + */ + for (f = ida->stack; f != NULL; f = f->next) { + if (f->zhp->zfs_dmustats.dds_guid == + zhp->zfs_dmustats.dds_guid) { + if (ida->allowrecursion) { + zfs_close(zhp); + return (0); + } else { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, + "recursive dependency at '%s'"), + zfs_get_name(zhp)); + err = zfs_error(zhp->zfs_hdl, + EZFS_RECURSIVE, + dgettext(TEXT_DOMAIN, + "cannot determine dependent " + "datasets")); + zfs_close(zhp); + return (err); + } + } + } + + isf.zhp = zhp; + isf.next = ida->stack; + ida->stack = &isf; + err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida); + if (err == 0) + err = zfs_iter_snapshots(zhp, B_FALSE, + iter_dependents_cb, ida); + ida->stack = isf.next; + } + if (!first && err == 0) + err = ida->func(zhp, ida->data); + return (err); +} + +int +zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, + zfs_iter_f func, void *data) +{ + iter_dependents_arg_t ida; + ida.allowrecursion = allowrecursion; + ida.stack = NULL; + ida.func = func; + ida.data = data; + ida.first = B_TRUE; + return (iter_dependents_cb(zfs_handle_dup(zhp), &ida)); +} diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index bc623733d546..0702ba0a733b 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -1,3037 +1,3130 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_fletcher.h" #include "libzfs_impl.h" #include #include #include /* in libzfs_dataset.c */ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); -static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, +static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); static const zio_cksum_t zero_cksum = { { 0 } }; typedef struct dedup_arg { int inputfd; int outputfd; libzfs_handle_t *dedup_hdl; } dedup_arg_t; typedef struct dataref { uint64_t ref_guid; uint64_t ref_object; uint64_t ref_offset; } dataref_t; typedef struct dedup_entry { struct dedup_entry *dde_next; zio_cksum_t dde_chksum; uint64_t dde_prop; dataref_t dde_ref; } dedup_entry_t; #define MAX_DDT_PHYSMEM_PERCENT 20 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128 typedef struct dedup_table { dedup_entry_t **dedup_hash_array; umem_cache_t *ddecache; uint64_t max_ddt_size; /* max dedup table size in bytes */ uint64_t cur_ddt_size; /* current dedup table size in bytes */ uint64_t ddt_count; int numhashbits; boolean_t ddt_full; } dedup_table_t; static int high_order_bit(uint64_t n) { int count; for (count = 0; n != 0; count++) n >>= 1; return (count); } static size_t ssread(void *buf, size_t len, FILE *stream) { size_t outlen; if ((outlen = fread(buf, len, 1, stream)) == 0) return (0); return (outlen); } static void ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp, zio_cksum_t *cs, uint64_t prop, dataref_t *dr) { dedup_entry_t *dde; if (ddt->cur_ddt_size >= ddt->max_ddt_size) { if (ddt->ddt_full == B_FALSE) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Dedup table full. Deduplication will continue " "with existing table entries")); ddt->ddt_full = B_TRUE; } return; } if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT)) != NULL) { assert(*ddepp == NULL); dde->dde_next = NULL; dde->dde_chksum = *cs; dde->dde_prop = prop; dde->dde_ref = *dr; *ddepp = dde; ddt->cur_ddt_size += sizeof (dedup_entry_t); ddt->ddt_count++; } } /* * Using the specified dedup table, do a lookup for an entry with * the checksum cs. If found, return the block's reference info * in *dr. Otherwise, insert a new entry in the dedup table, using * the reference information specified by *dr. * * return value: true - entry was found * false - entry was not found */ static boolean_t ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs, uint64_t prop, dataref_t *dr) { uint32_t hashcode; dedup_entry_t **ddepp; hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits); for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL; ddepp = &((*ddepp)->dde_next)) { if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) && (*ddepp)->dde_prop == prop) { *dr = (*ddepp)->dde_ref; return (B_TRUE); } } ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr); return (B_FALSE); } static int cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) { fletcher_4_incremental_native(buf, len, zc); return (write(outfd, buf, len)); } /* * This function is started in a separate thread when the dedup option * has been requested. The main send thread determines the list of * snapshots to be included in the send stream and makes the ioctl calls * for each one. But instead of having the ioctl send the output to the * the output fd specified by the caller of zfs_send()), the * ioctl is told to direct the output to a pipe, which is read by the * alternate thread running THIS function. This function does the * dedup'ing by: * 1. building a dedup table (the DDT) * 2. doing checksums on each data block and inserting a record in the DDT * 3. looking for matching checksums, and * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever * a duplicate block is found. * The output of this function then goes to the output fd requested * by the caller of zfs_send(). */ static void * cksummer(void *arg) { dedup_arg_t *dda = arg; char *buf = malloc(1<<20); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; struct drr_end *drre = &thedrr.drr_u.drr_end; struct drr_object *drro = &thedrr.drr_u.drr_object; struct drr_write *drrw = &thedrr.drr_u.drr_write; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; FILE *ofp; int outfd; dmu_replay_record_t wbr_drr = {0}; struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; dedup_table_t ddt; zio_cksum_t stream_cksum; uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); uint64_t numbuckets; ddt.max_ddt_size = MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, SMALLEST_POSSIBLE_MAX_DDT_MB<<20); numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); /* * numbuckets must be a power of 2. Increase number to * a power of 2 if necessary. */ if (!ISP2(numbuckets)) numbuckets = 1 << high_order_bit(numbuckets); ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *)); ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *); ddt.numhashbits = high_order_bit(numbuckets) - 1; ddt.ddt_full = B_FALSE; /* Initialize the write-by-reference block. */ wbr_drr.drr_type = DRR_WRITE_BYREF; wbr_drr.drr_payloadlen = 0; outfd = dda->outputfd; ofp = fdopen(dda->inputfd, "r"); while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { switch (drr->drr_type) { case DRR_BEGIN: { int fflags; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); /* set the DEDUP feature flag for this stream */ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); fflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { int sz = drr->drr_payloadlen; if (sz > 1<<20) { free(buf); buf = malloc(sz); } (void) ssread(buf, sz, ofp); if (ferror(stdin)) perror("fread"); if (cksum_and_write(buf, sz, &stream_cksum, outfd) == -1) goto out; } break; } case DRR_END: { /* use the recalculated checksum */ ZIO_SET_CHECKSUM(&drre->drr_checksum, stream_cksum.zc_word[0], stream_cksum.zc_word[1], stream_cksum.zc_word[2], stream_cksum.zc_word[3]); if ((write(outfd, drr, sizeof (dmu_replay_record_t))) == -1) goto out; break; } case DRR_OBJECT: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (drro->drr_bonuslen > 0) { (void) ssread(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), ofp); if (cksum_and_write(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), &stream_cksum, outfd) == -1) goto out; } break; } case DRR_SPILL: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; (void) ssread(buf, drrs->drr_length, ofp); if (cksum_and_write(buf, drrs->drr_length, &stream_cksum, outfd) == -1) goto out; break; } case DRR_FREEOBJECTS: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; break; } case DRR_WRITE: { dataref_t dataref; (void) ssread(buf, drrw->drr_length, ofp); /* * Use the existing checksum if it's dedup-capable, * else calculate a SHA256 checksum for it. */ if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { zio_cksum_t tmpsha256; zio_checksum_SHA256(buf, drrw->drr_length, &tmpsha256); drrw->drr_key.ddk_cksum.zc_word[0] = BE_64(tmpsha256.zc_word[0]); drrw->drr_key.ddk_cksum.zc_word[1] = BE_64(tmpsha256.zc_word[1]); drrw->drr_key.ddk_cksum.zc_word[2] = BE_64(tmpsha256.zc_word[2]); drrw->drr_key.ddk_cksum.zc_word[3] = BE_64(tmpsha256.zc_word[3]); drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; } dataref.ref_guid = drrw->drr_toguid; dataref.ref_object = drrw->drr_object; dataref.ref_offset = drrw->drr_offset; if (ddt_update(dda->dedup_hdl, &ddt, &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, &dataref)) { /* block already present in stream */ wbr_drrr->drr_object = drrw->drr_object; wbr_drrr->drr_offset = drrw->drr_offset; wbr_drrr->drr_length = drrw->drr_length; wbr_drrr->drr_toguid = drrw->drr_toguid; wbr_drrr->drr_refguid = dataref.ref_guid; wbr_drrr->drr_refobject = dataref.ref_object; wbr_drrr->drr_refoffset = dataref.ref_offset; wbr_drrr->drr_checksumtype = drrw->drr_checksumtype; wbr_drrr->drr_checksumflags = drrw->drr_checksumtype; wbr_drrr->drr_key.ddk_cksum = drrw->drr_key.ddk_cksum; wbr_drrr->drr_key.ddk_prop = drrw->drr_key.ddk_prop; if (cksum_and_write(&wbr_drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; } else { /* block not previously seen */ if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (cksum_and_write(buf, drrw->drr_length, &stream_cksum, outfd) == -1) goto out; } break; } case DRR_FREE: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; break; } default: (void) printf("INVALID record type 0x%x\n", drr->drr_type); /* should never happen, so assert */ assert(B_FALSE); } } out: umem_cache_destroy(ddt.ddecache); free(ddt.dedup_hash_array); free(buf); (void) fclose(ofp); return (NULL); } /* * Routines for dealing with the AVL tree of fs-nvlists */ typedef struct fsavl_node { avl_node_t fn_node; nvlist_t *fn_nvfs; char *fn_snapname; uint64_t fn_guid; } fsavl_node_t; static int fsavl_compare(const void *arg1, const void *arg2) { const fsavl_node_t *fn1 = arg1; const fsavl_node_t *fn2 = arg2; if (fn1->fn_guid > fn2->fn_guid) return (+1); else if (fn1->fn_guid < fn2->fn_guid) return (-1); else return (0); } /* * Given the GUID of a snapshot, find its containing filesystem and * (optionally) name. */ static nvlist_t * fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname) { fsavl_node_t fn_find; fsavl_node_t *fn; fn_find.fn_guid = snapguid; fn = avl_find(avl, &fn_find, NULL); if (fn) { if (snapname) *snapname = fn->fn_snapname; return (fn->fn_nvfs); } return (NULL); } static void fsavl_destroy(avl_tree_t *avl) { fsavl_node_t *fn; void *cookie; if (avl == NULL) return; cookie = NULL; while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL) free(fn); avl_destroy(avl); free(avl); } /* * Given an nvlist, produce an avl tree of snapshots, ordered by guid */ static avl_tree_t * fsavl_create(nvlist_t *fss) { avl_tree_t *fsavl; nvpair_t *fselem = NULL; if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL) return (NULL); avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t), offsetof(fsavl_node_t, fn_node)); while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) { nvlist_t *nvfs, *snaps; nvpair_t *snapelem = NULL; VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); while ((snapelem = nvlist_next_nvpair(snaps, snapelem)) != NULL) { fsavl_node_t *fn; uint64_t guid; VERIFY(0 == nvpair_value_uint64(snapelem, &guid)); if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) { fsavl_destroy(fsavl); return (NULL); } fn->fn_nvfs = nvfs; fn->fn_snapname = nvpair_name(snapelem); fn->fn_guid = guid; /* * Note: if there are multiple snaps with the * same GUID, we ignore all but one. */ if (avl_find(fsavl, fn, NULL) == NULL) avl_add(fsavl, fn); else free(fn); } } return (fsavl); } /* * Routines for dealing with the giant nvlist of fs-nvlists, etc. */ typedef struct send_data { uint64_t parent_fromsnap_guid; nvlist_t *parent_snaps; nvlist_t *fss; nvlist_t *snapprops; const char *fromsnap; const char *tosnap; boolean_t recursive; /* * The header nvlist is of the following format: * { * "tosnap" -> string * "fromsnap" -> string (if incremental) * "fss" -> { * id -> { * * "name" -> string (full name; for debugging) * "parentfromsnap" -> number (guid of fromsnap in parent) * * "props" -> { name -> value (only if set here) } * "snaps" -> { name (lastname) -> number (guid) } * "snapprops" -> { name (lastname) -> { name -> value } } * * "origin" -> number (guid) (if clone) * "sent" -> boolean (not on-disk) * } * } * } * */ } send_data_t; static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv); static int send_iterate_snap(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; uint64_t guid = zhp->zfs_dmustats.dds_guid; char *snapname; nvlist_t *nv; snapname = strrchr(zhp->zfs_name, '@')+1; VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid)); /* * NB: if there is no fromsnap here (it's a newly created fs in * an incremental replication), we will substitute the tosnap. */ if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) || (sd->parent_fromsnap_guid == 0 && sd->tosnap && strcmp(snapname, sd->tosnap) == 0)) { sd->parent_fromsnap_guid = guid; } VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv)); nvlist_free(nv); zfs_close(zhp); return (0); } static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) { nvpair_t *elem = NULL; while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { char *propname = nvpair_name(elem); zfs_prop_t prop = zfs_name_to_prop(propname); nvlist_t *propnv; if (!zfs_prop_user(propname)) { /* * Realistically, this should never happen. However, * we want the ability to add DSL properties without * needing to make incompatible version changes. We * need to ignore unknown properties to allow older * software to still send datasets containing these * properties, with the unknown properties elided. */ if (prop == ZPROP_INVAL) continue; if (zfs_prop_readonly(prop)) continue; } verify(nvpair_value_nvlist(elem, &propnv) == 0); if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || prop == ZFS_PROP_REFQUOTA || prop == ZFS_PROP_REFRESERVATION) { char *source; uint64_t value; verify(nvlist_lookup_uint64(propnv, ZPROP_VALUE, &value) == 0); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) continue; /* * May have no source before SPA_VERSION_RECVD_PROPS, * but is still modifiable. */ if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) == 0) { if ((strcmp(source, zhp->zfs_name) != 0) && (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) continue; } } else { char *source; if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) != 0) continue; if ((strcmp(source, zhp->zfs_name) != 0) && (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) continue; } if (zfs_prop_user(propname) || zfs_prop_get_type(prop) == PROP_TYPE_STRING) { char *value; verify(nvlist_lookup_string(propnv, ZPROP_VALUE, &value) == 0); VERIFY(0 == nvlist_add_string(nv, propname, value)); } else { uint64_t value; verify(nvlist_lookup_uint64(propnv, ZPROP_VALUE, &value) == 0); VERIFY(0 == nvlist_add_uint64(nv, propname, value)); } } } /* * recursively generate nvlists describing datasets. See comment * for the data structure send_data_t above for description of contents * of the nvlist. */ static int send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; nvlist_t *nvfs, *nv; int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t guid = zhp->zfs_dmustats.dds_guid; char guidstring[64]; VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name)); VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap", sd->parent_fromsnap_guid)); if (zhp->zfs_dmustats.dds_origin[0]) { zfs_handle_t *origin = zfs_open(zhp->zfs_hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); if (origin == NULL) return (-1); VERIFY(0 == nvlist_add_uint64(nvfs, "origin", origin->zfs_dmustats.dds_guid)); } /* iterate over props */ VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); nvlist_free(nv); /* iterate over snaps, and set sd->parent_fromsnap_guid */ sd->parent_fromsnap_guid = 0; VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0)); (void) zfs_iter_snapshots(zhp, B_FALSE, send_iterate_snap, sd); VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps)); VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops)); nvlist_free(sd->parent_snaps); nvlist_free(sd->snapprops); /* add this fs to nvlist */ (void) snprintf(guidstring, sizeof (guidstring), "0x%llx", (longlong_t)guid); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); nvlist_free(nvfs); /* iterate over children */ if (sd->recursive) rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); sd->parent_fromsnap_guid = parent_fromsnap_guid_save; zfs_close(zhp); return (rv); } static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; send_data_t sd = { 0 }; int error; zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (zhp == NULL) return (EZFS_BADTYPE); VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); sd.fromsnap = fromsnap; sd.tosnap = tosnap; sd.recursive = recursive; if ((error = send_iterate_fs(zhp, &sd)) != 0) { nvlist_free(sd.fss); if (avlp != NULL) *avlp = NULL; *nvlp = NULL; return (error); } if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) { nvlist_free(sd.fss); *nvlp = NULL; return (EZFS_NOMEM); } *nvlp = sd.fss; return (0); } -/* - * Routines for dealing with the sorted snapshot functionality - */ -typedef struct zfs_node { - zfs_handle_t *zn_handle; - avl_node_t zn_avlnode; -} zfs_node_t; - -static int -zfs_sort_snaps(zfs_handle_t *zhp, void *data) -{ - avl_tree_t *avl = data; - zfs_node_t *node; - zfs_node_t search; - - search.zn_handle = zhp; - node = avl_find(avl, &search, NULL); - if (node) { - /* - * If this snapshot was renamed while we were creating the - * AVL tree, it's possible that we already inserted it under - * its old name. Remove the old handle before adding the new - * one. - */ - zfs_close(node->zn_handle); - avl_remove(avl, node); - free(node); - } - - node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); - node->zn_handle = zhp; - avl_add(avl, node); - - return (0); -} - -static int -zfs_snapshot_compare(const void *larg, const void *rarg) -{ - zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; - zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; - uint64_t lcreate, rcreate; - - /* - * Sort them according to creation time. We use the hidden - * CREATETXG property to get an absolute ordering of snapshots. - */ - lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); - rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); - - if (lcreate < rcreate) - return (-1); - else if (lcreate > rcreate) - return (+1); - else - return (0); -} - -int -zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) -{ - int ret = 0; - zfs_node_t *node; - avl_tree_t avl; - void *cookie = NULL; - - avl_create(&avl, zfs_snapshot_compare, - sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); - - ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl); - - for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) - ret |= callback(node->zn_handle, data); - - while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) - free(node); - - avl_destroy(&avl); - - return (ret); -} - /* * Routines specific to "zfs send" */ typedef struct send_dump_data { /* these are all just the short snapname (the part after the @) */ const char *fromsnap; const char *tosnap; char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose; + boolean_t verbose, dryrun, parsable; int outfd; boolean_t err; nvlist_t *fss; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; nvlist_t *debugnv; char holdtag[ZFS_MAXNAMELEN]; int cleanup_fd; + uint64_t size; } send_dump_data_t; +static int +estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, + boolean_t fromorigin, uint64_t *sizep) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + assert(fromsnap_obj == 0 || !fromorigin); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_obj = fromorigin; + zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zc.zc_fromobj = fromsnap_obj; + zc.zc_guid = 1; /* estimate flag */ + + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot estimate space for '%s'"), zhp->zfs_name); + + switch (errno) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + if (zfs_dataset_exists(hdl, zc.zc_name, + ZFS_TYPE_SNAPSHOT)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (@%s) does not exist"), + zc.zc_value); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENOSTR: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + *sizep = zc.zc_objset_type; + + return (0); +} + /* * Dumps a backup of the given snapshot (incremental from fromsnap if it's not * NULL) to the file descriptor specified by outfd. */ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, boolean_t fromorigin, int outfd, nvlist_t *debugnv) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *thisdbg; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); assert(fromsnap_obj == 0 || !fromorigin); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zc.zc_cookie = outfd; zc.zc_obj = fromorigin; zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zc.zc_fromobj = fromsnap_obj; VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); if (fromsnap && fromsnap[0] != '\0') { VERIFY(0 == nvlist_add_string(thisdbg, "fromsnap", fromsnap)); } - if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) { + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno)); if (debugnv) { VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); } nvlist_free(thisdbg); switch (errno) { - case EXDEV: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incremental source (@%s) does not exist"), zc.zc_value); } return (zfs_error(hdl, EZFS_NOENT, errbuf)); case EDQUOT: case EFBIG: case EIO: case ENOLINK: case ENOSPC: case ENOSTR: case ENXIO: case EPIPE: case ERANGE: case EFAULT: case EROFS: zfs_error_aux(hdl, strerror(errno)); return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); default: return (zfs_standard_error(hdl, errno, errbuf)); } } if (debugnv) VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); nvlist_free(thisdbg); return (0); } static int hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) { zfs_handle_t *pzhp; int error = 0; char *thissnap; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + if (sdd->dryrun) + return (0); + /* * zfs_send() only opens a cleanup_fd for sends that need it, * e.g. replication and doall. */ if (sdd->cleanup_fd == -1) return (0); thissnap = strchr(zhp->zfs_name, '@') + 1; *(thissnap - 1) = '\0'; pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); *(thissnap - 1) = '@'; /* * It's OK if the parent no longer exists. The send code will * handle that error. */ if (pzhp) { error = zfs_hold(pzhp, thissnap, sdd->holdtag, B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd, zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID), zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG)); zfs_close(pzhp); } return (error); } static int dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; char *thissnap; int err; - boolean_t isfromsnap, istosnap; + boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { err = hold_for_send(zhp, sdd); if (err == 0) { sdd->seenfrom = B_TRUE; (void) strcpy(sdd->prevsnap, thissnap); sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); } else if (err == ENOENT) { err = 0; } zfs_close(zhp); return (err); } if (sdd->seento || !sdd->seenfrom) { zfs_close(zhp); return (0); } istosnap = (strcmp(sdd->tosnap, thissnap) == 0); if (istosnap) sdd->seento = B_TRUE; if (!sdd->doall && !isfromsnap && !istosnap) { if (sdd->replicate) { char *snapname; nvlist_t *snapprops; /* * Filter out all intermediate snapshots except origin * snapshots needed to replicate clones. */ nvlist_t *nvfs = fsavl_find(sdd->fsavl, zhp->zfs_dmustats.dds_guid, &snapname); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snapprops", &snapprops)); VERIFY(0 == nvlist_lookup_nvlist(snapprops, thissnap, &snapprops)); exclude = !nvlist_exists(snapprops, "is_clone_origin"); } else { exclude = B_TRUE; } } /* * If a filter function exists, call it to determine whether * this snapshot will be sent. */ if (exclude || (sdd->filter_cb != NULL && sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { /* * This snapshot is filtered out. Don't send it, and don't * set prevsnap_obj, so it will be as if this snapshot didn't * exist, and the next accepted snapshot will be sent as * an incremental from the last accepted one, or as the * first (and full) snapshot in the case of a replication, * non-incremental send. */ zfs_close(zhp); return (0); } err = hold_for_send(zhp, sdd); if (err) { if (err == ENOENT) err = 0; zfs_close(zhp); return (err); } - /* send it */ + fromorigin = sdd->prevsnap[0] == '\0' && + (sdd->fromorigin || sdd->replicate); + if (sdd->verbose) { - (void) fprintf(stderr, "sending from @%s to %s\n", - sdd->prevsnap, zhp->zfs_name); + uint64_t size; + err = estimate_ioctl(zhp, sdd->prevsnap_obj, + fromorigin, &size); + + if (sdd->parsable) { + if (sdd->prevsnap[0] != '\0') { + (void) fprintf(stderr, "incremental\t%s\t%s", + sdd->prevsnap, zhp->zfs_name); + } else { + (void) fprintf(stderr, "full\t%s", + zhp->zfs_name); + } + } else { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "send from @%s to %s"), + sdd->prevsnap, zhp->zfs_name); + } + if (err == 0) { + if (sdd->parsable) { + (void) fprintf(stderr, "\t%llu\n", + (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + " estimated size is %s\n"), buf); + } + sdd->size += size; + } else { + (void) fprintf(stderr, "\n"); + } } - err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, - sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), - sdd->outfd, sdd->debugnv); + if (!sdd->dryrun) { + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, + fromorigin, sdd->outfd, sdd->debugnv); + } (void) strcpy(sdd->prevsnap, thissnap); sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); return (err); } static int dump_filesystem(zfs_handle_t *zhp, void *arg) { int rv = 0; send_dump_data_t *sdd = arg; boolean_t missingfrom = B_FALSE; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->tosnap); if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { - (void) fprintf(stderr, "WARNING: " - "could not send %s@%s: does not exist\n", + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: could not send %s@%s: does not exist\n"), zhp->zfs_name, sdd->tosnap); sdd->err = B_TRUE; return (0); } if (sdd->replicate && sdd->fromsnap) { /* * If this fs does not have fromsnap, and we're doing * recursive, we need to send a full stream from the * beginning (or an incremental from the origin if this * is a clone). If we're doing non-recursive, then let * them get the error. */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->fromsnap); if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { missingfrom = B_TRUE; } } sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; sdd->prevsnap_obj = 0; if (sdd->fromsnap == NULL || missingfrom) sdd->seenfrom = B_TRUE; rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); if (!sdd->seenfrom) { - (void) fprintf(stderr, + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) does not exist\n", + "incremental source (%s@%s) does not exist\n"), zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); sdd->err = B_TRUE; } else if (!sdd->seento) { if (sdd->fromsnap) { - (void) fprintf(stderr, + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "WARNING: could not send %s@%s:\n" "incremental source (%s@%s) " - "is not earlier than it\n", + "is not earlier than it\n"), zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); } else { - (void) fprintf(stderr, "WARNING: " - "could not send %s@%s: does not exist\n", + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: " + "could not send %s@%s: does not exist\n"), zhp->zfs_name, sdd->tosnap); } sdd->err = B_TRUE; } return (rv); } static int dump_filesystems(zfs_handle_t *rzhp, void *arg) { send_dump_data_t *sdd = arg; nvpair_t *fspair; boolean_t needagain, progress; if (!sdd->replicate) return (dump_filesystem(rzhp, sdd)); /* Mark the clone origin snapshots. */ for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; fspair = nvlist_next_nvpair(sdd->fss, fspair)) { nvlist_t *nvfs; uint64_t origin_guid = 0; VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs)); (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid); if (origin_guid != 0) { char *snapname; nvlist_t *origin_nv = fsavl_find(sdd->fsavl, origin_guid, &snapname); if (origin_nv != NULL) { nvlist_t *snapprops; VERIFY(0 == nvlist_lookup_nvlist(origin_nv, "snapprops", &snapprops)); VERIFY(0 == nvlist_lookup_nvlist(snapprops, snapname, &snapprops)); VERIFY(0 == nvlist_add_boolean( snapprops, "is_clone_origin")); } } } again: needagain = progress = B_FALSE; for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; fspair = nvlist_next_nvpair(sdd->fss, fspair)) { - nvlist_t *fslist; + nvlist_t *fslist, *parent_nv; char *fsname; zfs_handle_t *zhp; int err; uint64_t origin_guid = 0; + uint64_t parent_guid = 0; VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); if (nvlist_lookup_boolean(fslist, "sent") == 0) continue; VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); + (void) nvlist_lookup_uint64(fslist, "parentfromsnap", + &parent_guid); + + if (parent_guid != 0) { + parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL); + if (!nvlist_exists(parent_nv, "sent")) { + /* parent has not been sent; skip this one */ + needagain = B_TRUE; + continue; + } + } if (origin_guid != 0) { nvlist_t *origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL); if (origin_nv != NULL && - nvlist_lookup_boolean(origin_nv, - "sent") == ENOENT) { + !nvlist_exists(origin_nv, "sent")) { /* * origin has not been sent yet; * skip this clone. */ needagain = B_TRUE; continue; } } zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); err = dump_filesystem(zhp, sdd); VERIFY(nvlist_add_boolean(fslist, "sent") == 0); progress = B_TRUE; zfs_close(zhp); if (err) return (err); } if (needagain) { assert(progress); goto again; } + + /* clean out the sent flags in case we reuse this fss */ + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *fslist; + + VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); + (void) nvlist_remove_all(fslist, "sent"); + } + return (0); } /* * Generate a send stream for the dataset identified by the argument zhp. * * The content of the send stream is the snapshot identified by * 'tosnap'. Incremental streams are requested in two ways: * - from the snapshot identified by "fromsnap" (if non-null) or * - from the origin of the dataset identified by zhp, which must * be a clone. In this case, "fromsnap" is null and "fromorigin" * is TRUE. * * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM) * if "replicate" is set. If "doall" is set, dump all the intermediate * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall" * case too. If "props" is set, send properties. */ int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - sendflags_t flags, int outfd, snapfilter_cb_t filter_func, + sendflags_t *flags, int outfd, snapfilter_cb_t filter_func, void *cb_arg, nvlist_t **debugnvp) { char errbuf[1024]; send_dump_data_t sdd = { 0 }; - int err; + int err = 0; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; boolean_t holdsnaps = B_FALSE; pthread_t tid; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot send '%s'"), zhp->zfs_name); if (fromsnap && fromsnap[0] == '\0') { zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "zero-length incremental source")); return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); } if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { uint64_t version; version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); if (version >= ZPL_VERSION_SA) { featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; } } - if (zfs_spa_version(zhp, &spa_version) == 0 && + if (!flags->dryrun && zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS && - (flags.doall || flags.replicate)) + (flags->doall || flags->replicate)) holdsnaps = B_TRUE; - if (flags.dedup) { + if (flags->dedup && !flags->dryrun) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd))) { zfs_error_aux(zhp->zfs_hdl, strerror(errno)); return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf)); } dda.outputfd = outfd; dda.inputfd = pipefd[1]; dda.dedup_hdl = zhp->zfs_hdl; if ((err = pthread_create(&tid, NULL, cksummer, &dda))) { (void) close(pipefd[0]); (void) close(pipefd[1]); zfs_error_aux(zhp->zfs_hdl, strerror(errno)); return (zfs_error(zhp->zfs_hdl, EZFS_THREADCREATEFAILED, errbuf)); } } - if (flags.replicate || flags.doall || flags.props) { + if (flags->replicate || flags->doall || flags->props) { dmu_replay_record_t drr = { 0 }; char *packbuf = NULL; size_t buflen = 0; zio_cksum_t zc = { { 0 } }; - if (flags.replicate || flags.props) { + if (flags->replicate || flags->props) { nvlist_t *hdrnv; VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); if (fromsnap) { VERIFY(0 == nvlist_add_string(hdrnv, "fromsnap", fromsnap)); } VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); - if (!flags.replicate) { + if (!flags->replicate) { VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags.replicate, &fss, &fsavl); + fromsnap, tosnap, flags->replicate, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); err = nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR, 0); if (debugnvp) *debugnvp = hdrnv; else nvlist_free(hdrnv); if (err) { fsavl_destroy(fsavl); nvlist_free(fss); goto stderr_out; } } - /* write first begin record */ - drr.drr_type = DRR_BEGIN; - drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo, - DMU_COMPOUNDSTREAM); - DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo, - featureflags); - (void) snprintf(drr.drr_u.drr_begin.drr_toname, - sizeof (drr.drr_u.drr_begin.drr_toname), - "%s@%s", zhp->zfs_name, tosnap); - drr.drr_payloadlen = buflen; - err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); - - /* write header nvlist */ - if (err != -1 && packbuf != NULL) { - err = cksum_and_write(packbuf, buflen, &zc, outfd); - } - free(packbuf); - if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); - err = errno; - goto stderr_out; - } + if (!flags->dryrun) { + /* write first begin record */ + drr.drr_type = DRR_BEGIN; + drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. + drr_versioninfo, DMU_COMPOUNDSTREAM); + DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. + drr_versioninfo, featureflags); + (void) snprintf(drr.drr_u.drr_begin.drr_toname, + sizeof (drr.drr_u.drr_begin.drr_toname), + "%s@%s", zhp->zfs_name, tosnap); + drr.drr_payloadlen = buflen; + err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); + + /* write header nvlist */ + if (err != -1 && packbuf != NULL) { + err = cksum_and_write(packbuf, buflen, &zc, + outfd); + } + free(packbuf); + if (err == -1) { + fsavl_destroy(fsavl); + nvlist_free(fss); + err = errno; + goto stderr_out; + } - /* write end record */ - if (err != -1) { + /* write end record */ bzero(&drr, sizeof (drr)); drr.drr_type = DRR_END; drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); err = errno; goto stderr_out; } + + err = 0; } } /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags.dedup) + if (flags->dedup) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; - sdd.replicate = flags.replicate; - sdd.doall = flags.doall; - sdd.fromorigin = flags.fromorigin; + sdd.replicate = flags->replicate; + sdd.doall = flags->doall; + sdd.fromorigin = flags->fromorigin; sdd.fss = fss; sdd.fsavl = fsavl; - sdd.verbose = flags.verbose; + sdd.verbose = flags->verbose; + sdd.parsable = flags->parsable; + sdd.dryrun = flags->dryrun; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) sdd.debugnv = *debugnvp; if (holdsnaps) { ++holdseq; (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag), ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); sdd.cleanup_fd = open(ZFS_DEV, O_RDWR); if (sdd.cleanup_fd < 0) { err = errno; goto stderr_out; } } else { sdd.cleanup_fd = -1; } + if (flags->verbose) { + /* + * Do a verbose no-op dry run to get all the verbose output + * before generating any data. Then do a non-verbose real + * run to generate the streams. + */ + sdd.dryrun = B_TRUE; + err = dump_filesystems(zhp, &sdd); + sdd.dryrun = flags->dryrun; + sdd.verbose = B_FALSE; + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags.dedup) { + if (flags->dedup) { (void) close(pipefd[0]); (void) pthread_join(tid, NULL); } if (sdd.cleanup_fd != -1) { VERIFY(0 == close(sdd.cleanup_fd)); sdd.cleanup_fd = -1; } - if (flags.replicate || flags.doall || flags.props) { + if (!flags->dryrun && (flags->replicate || flags->doall || + flags->props)) { /* * write final end record. NB: want to do this even if * there was some error, because it might not be totally * failed. */ dmu_replay_record_t drr = { 0 }; drr.drr_type = DRR_END; if (write(outfd, &drr, sizeof (drr)) == -1) { return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } } return (err || sdd.err); stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags.dedup) { + if (flags->dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); } return (err); } /* * Routines specific to "zfs recv" */ static int recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, boolean_t byteswap, zio_cksum_t *zc) { char *cp = buf; int rv; int len = ilen; do { rv = read(fd, cp, len); cp += rv; len -= rv; } while (rv > 0); if (rv < 0 || len != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to read from stream")); return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN, "cannot receive"))); } if (zc) { if (byteswap) fletcher_4_incremental_byteswap(buf, ilen, zc); else fletcher_4_incremental_native(buf, ilen, zc); } return (0); } static int recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, boolean_t byteswap, zio_cksum_t *zc) { char *buf; int err; buf = zfs_alloc(hdl, len); if (buf == NULL) return (ENOMEM); err = recv_read(hdl, fd, buf, len, byteswap, zc); if (err != 0) { free(buf); return (err); } err = nvlist_unpack(buf, len, nvp, 0); free(buf); if (err != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (malformed nvlist)")); return (EINVAL); } return (0); } static int recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, - int baselen, char *newname, recvflags_t flags) + int baselen, char *newname, recvflags_t *flags) { static int seq; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int err; prop_changelist_t *clp; zfs_handle_t *zhp; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags.force ? MS_FORCE : 0); + flags->force ? MS_FORCE : 0); zfs_close(zhp); if (clp == NULL) return (-1); err = changelist_prefix(clp); if (err) return (err); zc.zc_objset_type = DMU_OST_ZFS; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); if (tryname) { (void) strcpy(newname, tryname); (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value)); - if (flags.verbose) { + if (flags->verbose) { (void) printf("attempting rename %s to %s\n", zc.zc_name, zc.zc_value); } err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); if (err == 0) changelist_rename(clp, name, tryname); } else { err = ENOENT; } if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) { seq++; (void) strncpy(newname, name, baselen); (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen, "recv-%ld-%u", (long) getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); - if (flags.verbose) { + if (flags->verbose) { (void) printf("failed - trying rename %s to %s\n", zc.zc_name, zc.zc_value); } err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); if (err == 0) changelist_rename(clp, name, newname); - if (err && flags.verbose) { + if (err && flags->verbose) { (void) printf("failed (%u) - " "will try again on next pass\n", errno); } err = EAGAIN; - } else if (flags.verbose) { + } else if (flags->verbose) { if (err == 0) (void) printf("success\n"); else (void) printf("failed (%u)\n", errno); } (void) changelist_postfix(clp); changelist_free(clp); return (err); } static int recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, - char *newname, recvflags_t flags) + char *newname, recvflags_t *flags) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int err = 0; prop_changelist_t *clp; zfs_handle_t *zhp; boolean_t defer = B_FALSE; int spa_version; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags.force ? MS_FORCE : 0); + flags->force ? MS_FORCE : 0); if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS) defer = B_TRUE; zfs_close(zhp); if (clp == NULL) return (-1); err = changelist_prefix(clp); if (err) return (err); zc.zc_objset_type = DMU_OST_ZFS; zc.zc_defer_destroy = defer; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); - if (flags.verbose) + if (flags->verbose) (void) printf("attempting destroy %s\n", zc.zc_name); err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); if (err == 0) { - if (flags.verbose) + if (flags->verbose) (void) printf("success\n"); changelist_remove(clp, zc.zc_name); } (void) changelist_postfix(clp); changelist_free(clp); /* * Deferred destroy might destroy the snapshot or only mark it to be * destroyed later, and it returns success in either case. */ if (err != 0 || (defer && zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT))) { err = recv_rename(hdl, name, NULL, baselen, newname, flags); } return (err); } typedef struct guid_to_name_data { uint64_t guid; char *name; + char *skip; } guid_to_name_data_t; static int guid_to_name_cb(zfs_handle_t *zhp, void *arg) { guid_to_name_data_t *gtnd = arg; int err; + if (gtnd->skip != NULL && + strcmp(zhp->zfs_name, gtnd->skip) == 0) { + return (0); + } + if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); } + err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } +/* + * Attempt to find the local dataset associated with this guid. In the case of + * multiple matches, we attempt to find the "best" match by searching + * progressively larger portions of the hierarchy. This allows one to send a + * tree of datasets individually and guarantee that we will find the source + * guid within that hierarchy, even if there are multiple matches elsewhere. + */ static int guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, char *name) { /* exhaustive search all local snapshots */ + char pname[ZFS_MAXNAMELEN]; guid_to_name_data_t gtnd; int err = 0; zfs_handle_t *zhp; char *cp; gtnd.guid = guid; gtnd.name = name; + gtnd.skip = NULL; - if (strchr(parent, '@') == NULL) { - zhp = make_dataset_handle(hdl, parent); - if (zhp != NULL) { - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); - zfs_close(zhp); - if (err == EEXIST) - return (0); - } - } + (void) strlcpy(pname, parent, sizeof (pname)); - cp = strchr(parent, '/'); - if (cp) + /* + * Search progressively larger portions of the hierarchy. This will + * select the "most local" version of the origin snapshot in the case + * that there are multiple matching snapshots in the system. + */ + while ((cp = strrchr(pname, '/')) != NULL) { + + /* Chop off the last component and open the parent */ *cp = '\0'; - zhp = make_dataset_handle(hdl, parent); - if (cp) - *cp = '/'; + zhp = make_dataset_handle(hdl, pname); + + if (zhp == NULL) + continue; - if (zhp) { err = zfs_iter_children(zhp, guid_to_name_cb, >nd); zfs_close(zhp); - } + if (err == EEXIST) + return (0); - return (err == EEXIST ? 0 : ENOENT); + /* + * Remember the dataset that we already searched, so we + * skip it next time through. + */ + gtnd.skip = pname; + } + return (ENOENT); } /* - * Return true if dataset guid1 is created before guid2. + * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if + * guid1 is after guid2. */ static int created_before(libzfs_handle_t *hdl, avl_tree_t *avl, uint64_t guid1, uint64_t guid2) { nvlist_t *nvfs; char *fsname, *snapname; char buf[ZFS_MAXNAMELEN]; int rv; - zfs_node_t zn1, zn2; + zfs_handle_t *guid1hdl, *guid2hdl; + uint64_t create1, create2; if (guid2 == 0) return (0); if (guid1 == 0) return (1); nvfs = fsavl_find(avl, guid1, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (zn1.zn_handle == NULL) + guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid1hdl == NULL) return (-1); nvfs = fsavl_find(avl, guid2, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (zn2.zn_handle == NULL) { - zfs_close(zn2.zn_handle); + guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid2hdl == NULL) { + zfs_close(guid1hdl); return (-1); } - rv = (zfs_snapshot_compare(&zn1, &zn2) == -1); + create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG); + create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG); - zfs_close(zn1.zn_handle); - zfs_close(zn2.zn_handle); + if (create1 < create2) + rv = -1; + else if (create1 > create2) + rv = +1; + else + rv = 0; + + zfs_close(guid1hdl); + zfs_close(guid2hdl); return (rv); } static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, - recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, + recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, nvlist_t *renamed) { nvlist_t *local_nv; avl_tree_t *local_avl; nvpair_t *fselem, *nextfselem; char *fromsnap; char newname[ZFS_MAXNAMELEN]; int error; boolean_t needagain, progress, recursive; char *s1, *s2; VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); - if (flags.dryrun) + if (flags->dryrun) return (0); again: needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, recursive, &local_nv, &local_avl)) != 0) return (error); /* * Process deletes and renames */ for (fselem = nvlist_next_nvpair(local_nv, NULL); fselem; fselem = nextfselem) { nvlist_t *nvfs, *snaps; nvlist_t *stream_nvfs = NULL; nvpair_t *snapelem, *nextsnapelem; uint64_t fromguid = 0; uint64_t originguid = 0; uint64_t stream_originguid = 0; uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid; char *fsname, *stream_fsname; nextfselem = nvlist_next_nvpair(local_nv, fselem); VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap", &parent_fromsnap_guid)); (void) nvlist_lookup_uint64(nvfs, "origin", &originguid); /* * First find the stream's fs, so we can check for * a different origin (due to "zfs promote") */ for (snapelem = nvlist_next_nvpair(snaps, NULL); snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { uint64_t thisguid; VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); if (stream_nvfs != NULL) break; } /* check for promote */ (void) nvlist_lookup_uint64(stream_nvfs, "origin", &stream_originguid); if (stream_nvfs && originguid != stream_originguid) { switch (created_before(hdl, local_avl, stream_originguid, originguid)) { case 1: { /* promote it! */ zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; nvlist_t *origin_nvfs; char *origin_fsname; - if (flags.verbose) + if (flags->verbose) (void) printf("promoting %s\n", fsname); origin_nvfs = fsavl_find(local_avl, originguid, NULL); VERIFY(0 == nvlist_lookup_string(origin_nvfs, "name", &origin_fsname)); (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); if (error == 0) progress = B_TRUE; break; } default: break; case -1: fsavl_destroy(local_avl); nvlist_free(local_nv); return (-1); } /* * We had/have the wrong origin, therefore our * list of snapshots is wrong. Need to handle * them on the next pass. */ needagain = B_TRUE; continue; } for (snapelem = nvlist_next_nvpair(snaps, NULL); snapelem; snapelem = nextsnapelem) { uint64_t thisguid; char *stream_snapname; nvlist_t *found, *props; nextsnapelem = nvlist_next_nvpair(snaps, snapelem); VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); found = fsavl_find(stream_avl, thisguid, &stream_snapname); /* check for delete */ if (found == NULL) { char name[ZFS_MAXNAMELEN]; - if (!flags.force) + if (!flags->force) continue; (void) snprintf(name, sizeof (name), "%s@%s", fsname, nvpair_name(snapelem)); error = recv_destroy(hdl, name, strlen(fsname)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; continue; } stream_nvfs = found; if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", &props) && 0 == nvlist_lookup_nvlist(props, stream_snapname, &props)) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; zc.zc_cookie = B_TRUE; /* received */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", fsname, nvpair_name(snapelem)); if (zcmd_write_src_nvlist(hdl, &zc, props) == 0) { (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); zcmd_free_nvlists(&zc); } } /* check for different snapname */ if (strcmp(nvpair_name(snapelem), stream_snapname) != 0) { char name[ZFS_MAXNAMELEN]; char tryname[ZFS_MAXNAMELEN]; (void) snprintf(name, sizeof (name), "%s@%s", fsname, nvpair_name(snapelem)); (void) snprintf(tryname, sizeof (name), "%s@%s", fsname, stream_snapname); error = recv_rename(hdl, name, tryname, strlen(fsname)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; } if (strcmp(stream_snapname, fromsnap) == 0) fromguid = thisguid; } /* check for delete */ if (stream_nvfs == NULL) { - if (!flags.force) + if (!flags->force) continue; error = recv_destroy(hdl, fsname, strlen(tofs)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; continue; } if (fromguid == 0) { - if (flags.verbose) { + if (flags->verbose) { (void) printf("local fs %s does not have " "fromsnap (%s in stream); must have " "been deleted locally; ignoring\n", fsname, fromsnap); } continue; } VERIFY(0 == nvlist_lookup_string(stream_nvfs, "name", &stream_fsname)); VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, "parentfromsnap", &stream_parent_fromsnap_guid)); s1 = strrchr(fsname, '/'); s2 = strrchr(stream_fsname, '/'); /* * Check for rename. If the exact receive path is specified, it * does not count as a rename, but we still need to check the * datasets beneath it. */ if ((stream_parent_fromsnap_guid != 0 && parent_fromsnap_guid != 0 && stream_parent_fromsnap_guid != parent_fromsnap_guid) || - ((flags.isprefix || strcmp(tofs, fsname) != 0) && + ((flags->isprefix || strcmp(tofs, fsname) != 0) && (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; char tryname[ZFS_MAXNAMELEN]; parent = fsavl_find(local_avl, stream_parent_fromsnap_guid, NULL); /* * NB: parent might not be found if we used the * tosnap for stream_parent_fromsnap_guid, * because the parent is a newly-created fs; * we'll be able to rename it after we recv the * new fs. */ if (parent != NULL) { char *pname; VERIFY(0 == nvlist_lookup_string(parent, "name", &pname)); (void) snprintf(tryname, sizeof (tryname), "%s%s", pname, strrchr(stream_fsname, '/')); } else { tryname[0] = '\0'; - if (flags.verbose) { + if (flags->verbose) { (void) printf("local fs %s new parent " "not found\n", fsname); } } newname[0] = '\0'; error = recv_rename(hdl, fsname, tryname, strlen(tofs)+1, newname, flags); if (renamed != NULL && newname[0] != '\0') { VERIFY(0 == nvlist_add_boolean(renamed, newname)); } if (error) needagain = B_TRUE; else progress = B_TRUE; } } fsavl_destroy(local_avl); nvlist_free(local_nv); if (needagain && progress) { /* do another pass to fix up temporary names */ - if (flags.verbose) + if (flags->verbose) (void) printf("another pass:\n"); goto again; } return (needagain); } static int zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, - recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc, + recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { nvlist_t *stream_nv = NULL; avl_tree_t *stream_avl = NULL; char *fromsnap = NULL; char *cp; char tofs[ZFS_MAXNAMELEN]; char sendfs[ZFS_MAXNAMELEN]; char errbuf[1024]; dmu_replay_record_t drre; int error; boolean_t anyerr = B_FALSE; boolean_t softerr = B_FALSE; boolean_t recursive; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); assert(drr->drr_type == DRR_BEGIN); assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) == DMU_COMPOUNDSTREAM); /* * Read in the nvlist from the stream. */ if (drr->drr_payloadlen != 0) { error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, - &stream_nv, flags.byteswap, zc); + &stream_nv, flags->byteswap, zc); if (error) { error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } } recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); if (recursive && strchr(destname, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot stream")); error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } /* * Read in the end record and verify checksum. */ if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), - flags.byteswap, NULL))) + flags->byteswap, NULL))) goto out; - if (flags.byteswap) { + if (flags->byteswap) { drre.drr_type = BSWAP_32(drre.drr_type); drre.drr_u.drr_end.drr_checksum.zc_word[0] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); drre.drr_u.drr_end.drr_checksum.zc_word[1] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]); drre.drr_u.drr_end.drr_checksum.zc_word[2] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]); drre.drr_u.drr_end.drr_checksum.zc_word[3] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]); } if (drre.drr_type != DRR_END) { error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incorrect header checksum")); error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap); if (drr->drr_payloadlen != 0) { nvlist_t *stream_fss; VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss)); if ((stream_avl = fsavl_create(stream_fss)) == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "couldn't allocate avl tree")); error = zfs_error(hdl, EZFS_NOMEM, errbuf); goto out; } if (fromsnap != NULL) { nvlist_t *renamed = NULL; nvpair_t *pair = NULL; (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); - if (flags.isprefix) { + if (flags->isprefix) { struct drr_begin *drrb = &drr->drr_u.drr_begin; int i; - if (flags.istail) { + if (flags->istail) { cp = strrchr(drrb->drr_toname, '/'); if (cp == NULL) { (void) strlcat(tofs, "/", ZFS_MAXNAMELEN); i = 0; } else { i = (cp - drrb->drr_toname); } } else { i = strcspn(drrb->drr_toname, "/@"); } /* zfs_receive_one() will create_parents() */ (void) strlcat(tofs, &drrb->drr_toname[i], ZFS_MAXNAMELEN); *strchr(tofs, '@') = '\0'; } - if (recursive && !flags.dryrun && !flags.nomount) { + if (recursive && !flags->dryrun && !flags->nomount) { VERIFY(0 == nvlist_alloc(&renamed, NV_UNIQUE_NAME, 0)); } softerr = recv_incremental_replication(hdl, tofs, flags, stream_nv, stream_avl, renamed); /* Unmount renamed filesystems before receiving. */ while ((pair = nvlist_next_nvpair(renamed, pair)) != NULL) { zfs_handle_t *zhp; prop_changelist_t *clp = NULL; zhp = zfs_open(hdl, nvpair_name(pair), ZFS_TYPE_FILESYSTEM); if (zhp != NULL) { clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, 0); zfs_close(zhp); if (clp != NULL) { softerr |= changelist_prefix(clp); changelist_free(clp); } } } nvlist_free(renamed); } } /* * Get the fs specified by the first path in the stream (the top level * specified by 'zfs send') and pass it to each invocation of * zfs_receive_one(). */ (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); if ((cp = strchr(sendfs, '@')) != NULL) *cp = '\0'; /* Finally, receive each contained stream */ do { /* * we should figure out if it has a recoverable * error, in which case do a recv_skip() and drive on. * Note, if we fail due to already having this guid, * zfs_receive_one() will take care of it (ie, * recv_skip() and return 0). */ error = zfs_receive_impl(hdl, destname, flags, fd, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep); if (error == ENODATA) { error = 0; break; } anyerr |= error; } while (error == 0); if (drr->drr_payloadlen != 0 && fromsnap != NULL) { /* * Now that we have the fs's they sent us, try the * renames again. */ softerr = recv_incremental_replication(hdl, tofs, flags, stream_nv, stream_avl, NULL); } out: fsavl_destroy(stream_avl); if (stream_nv) nvlist_free(stream_nv); if (softerr) error = -2; if (anyerr) error = -1; return (error); } static void trunc_prop_errs(int truncated) { ASSERT(truncated != 0); if (truncated == 1) (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "1 more property could not be set\n")); else (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "%d more properties could not be set\n"), truncated); } static int recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; void *buf = malloc(1<<20); char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive:")); /* XXX would be great to use lseek if possible... */ drr = buf; while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t), byteswap, NULL) == 0) { if (byteswap) drr->drr_type = BSWAP_32(drr->drr_type); switch (drr->drr_type) { case DRR_BEGIN: /* NB: not to be used on v2 stream packages */ if (drr->drr_payloadlen != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid substream header")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } break; case DRR_END: free(buf); return (0); case DRR_OBJECT: if (byteswap) { drr->drr_u.drr_object.drr_bonuslen = BSWAP_32(drr->drr_u.drr_object. drr_bonuslen); } (void) recv_read(hdl, fd, buf, P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), B_FALSE, NULL); break; case DRR_WRITE: if (byteswap) { drr->drr_u.drr_write.drr_length = BSWAP_64(drr->drr_u.drr_write.drr_length); } (void) recv_read(hdl, fd, buf, drr->drr_u.drr_write.drr_length, B_FALSE, NULL); break; case DRR_SPILL: if (byteswap) { drr->drr_u.drr_write.drr_length = BSWAP_64(drr->drr_u.drr_spill.drr_length); } (void) recv_read(hdl, fd, buf, drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); break; case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid record type")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } } free(buf); return (-1); } /* * Restores a backup of tosnap from the file descriptor specified by infd. */ static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, - recvflags_t flags, dmu_replay_record_t *drr, + recvflags_t *flags, dmu_replay_record_t *drr, dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; time_t begin_time; int ioctl_err, ioctl_errno, err; char *cp; struct drr_begin *drrb = &drr->drr_u.drr_begin; char errbuf[1024]; char prop_errbuf[1024]; const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; zprop_errflags_t prop_errflags; boolean_t recursive; begin_time = time(NULL); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); if (stream_avl != NULL) { char *snapname; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); nvlist_t *props; int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", &parent_snapguid); err = nvlist_lookup_nvlist(fs, "props", &props); if (err) VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); - if (flags.canmountoff) { + if (flags->canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } ret = zcmd_write_src_nvlist(hdl, &zc, props); if (err) nvlist_free(props); if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { VERIFY(0 == nvlist_lookup_nvlist(props, snapname, &snapprops_nvlist)); } if (ret != 0) return (-1); } cp = NULL; /* * Determine how much of the snapshot name stored in the stream * we are going to tack on to the name they specified on the * command line, and how much we are going to chop off. * * If they specified a snapshot, chop the entire name stored in * the stream. */ - if (flags.istail) { + if (flags->istail) { /* * A filesystem was specified with -e. We want to tack on only * the tail of the sent snapshot path. */ if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -e")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } chopprefix = strrchr(sendfs, '/'); if (chopprefix == NULL) { /* * The tail is the poolname, so we need to * prepend a path separator. */ int len = strlen(drrb->drr_toname); cp = malloc(len + 2); cp[0] = '/'; (void) strcpy(&cp[1], drrb->drr_toname); chopprefix = cp; } else { chopprefix = drrb->drr_toname + (chopprefix - sendfs); } - } else if (flags.isprefix) { + } else if (flags->isprefix) { /* * A filesystem was specified with -d. We want to tack on * everything but the first element of the sent snapshot path * (all but the pool name). */ if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -d")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } chopprefix = strchr(drrb->drr_toname, '/'); if (chopprefix == NULL) chopprefix = strchr(drrb->drr_toname, '@'); } else if (strchr(tosnap, '@') == NULL) { /* * If a filesystem was specified without -d or -e, we want to * tack on everything after the fs specified by 'zfs send'. */ chopprefix = drrb->drr_toname + strlen(sendfs); } else { /* A snapshot was specified as an exact path (no -d or -e). */ if (recursive) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot " "stream")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); } ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname); ASSERT(chopprefix > drrb->drr_toname); ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname)); ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' || chopprefix[0] == '\0'); /* * Determine name of destination snapshot, store in zc_value. */ (void) strcpy(zc.zc_top_ds, tosnap); (void) strcpy(zc.zc_value, tosnap); (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); free(cp); if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* * Determine the name of the origin snapshot, store in zc_string. */ if (drrb->drr_flags & DRR_FLAG_CLONE) { - if (guid_to_name(hdl, tosnap, + if (guid_to_name(hdl, zc.zc_value, drrb->drr_fromguid, zc.zc_string) != 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "local origin for clone %s does not exist"), zc.zc_value); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } - if (flags.verbose) + if (flags->verbose) (void) printf("found clone origin %s\n", zc.zc_string); } stream_wantsnewfs = (drrb->drr_fromguid == 0 || (drrb->drr_flags & DRR_FLAG_CLONE)); if (stream_wantsnewfs) { /* * if the parent fs does not exist, look for it based on * the parent snap GUID */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive new filesystem stream")); (void) strcpy(zc.zc_name, zc.zc_value); cp = strrchr(zc.zc_name, '/'); if (cp) *cp = '\0'; if (cp && !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char suffix[ZFS_MAXNAMELEN]; (void) strcpy(suffix, strrchr(zc.zc_value, '/')); - if (guid_to_name(hdl, tosnap, parent_snapguid, + if (guid_to_name(hdl, zc.zc_name, parent_snapguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, suffix); } } } else { /* * if the fs does not exist, look for it based on the * fromsnap GUID */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive incremental stream")); (void) strcpy(zc.zc_name, zc.zc_value); *strchr(zc.zc_name, '@') = '\0'; /* * If the exact receive path was specified and this is the * topmost path in the stream, then if the fs does not exist we * should look no further. */ - if ((flags.isprefix || (*(chopprefix = drrb->drr_toname + + if ((flags->isprefix || (*(chopprefix = drrb->drr_toname + strlen(sendfs)) != '\0' && *chopprefix != '@')) && !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char snap[ZFS_MAXNAMELEN]; (void) strcpy(snap, strchr(zc.zc_value, '@')); - if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, + if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, snap); } } } (void) strcpy(zc.zc_name, zc.zc_value); *strchr(zc.zc_name, '@') = '\0'; if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { zfs_handle_t *zhp; /* * Destination fs exists. Therefore this should either * be an incremental, or the stream specifies a new fs * (full stream or clone) and they want us to blow it * away (and have therefore specified -F and removed any * snapshots). */ if (stream_wantsnewfs) { - if (!flags.force) { + if (!flags->force) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' exists\n" "must specify -F to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination has snapshots (eg. %s)\n" "must destroy them to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } } if ((zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { zcmd_free_nvlists(&zc); return (-1); } if (stream_wantsnewfs && zhp->zfs_dmustats.dds_origin[0]) { zcmd_free_nvlists(&zc); zfs_close(zhp); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' is a clone\n" "must destroy it to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } - if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && stream_wantsnewfs) { /* We can't do online recv in this case */ clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); if (clp == NULL) { zfs_close(zhp); zcmd_free_nvlists(&zc); return (-1); } if (changelist_prefix(clp) != 0) { changelist_free(clp); zfs_close(zhp); zcmd_free_nvlists(&zc); return (-1); } } - if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME && + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME && zvol_remove_link(hdl, zhp->zfs_name) != 0) { zfs_close(zhp); zcmd_free_nvlists(&zc); return (-1); } zfs_close(zhp); } else { /* * Destination filesystem does not exist. Therefore we better * be creating a new filesystem (either from a full backup, or * a clone). It would therefore be invalid if the user * specified only the pool name (i.e. if the destination name * contained no slash character). */ if (!stream_wantsnewfs || (cp = strrchr(zc.zc_name, '/')) == NULL) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' does not exist"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* * Trim off the final dataset component so we perform the * recvbackup ioctl to the filesystems's parent. */ *cp = '\0'; - if (flags.isprefix && !flags.istail && !flags.dryrun && + if (flags->isprefix && !flags->istail && !flags->dryrun && create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); } newfs = B_TRUE; } zc.zc_begin_record = drr_noswap->drr_u.drr_begin; zc.zc_cookie = infd; - zc.zc_guid = flags.force; - if (flags.verbose) { + zc.zc_guid = flags->force; + if (flags->verbose) { (void) printf("%s %s stream of %s into %s\n", - flags.dryrun ? "would receive" : "receiving", + flags->dryrun ? "would receive" : "receiving", drrb->drr_fromguid ? "incremental" : "full", drrb->drr_toname, zc.zc_value); (void) fflush(stdout); } - if (flags.dryrun) { + if (flags->dryrun) { zcmd_free_nvlists(&zc); - return (recv_skip(hdl, infd, flags.byteswap)); + return (recv_skip(hdl, infd, flags->byteswap)); } zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; zc.zc_nvlist_dst_size = sizeof (prop_errbuf); zc.zc_cleanup_fd = cleanup_fd; zc.zc_action_handle = *action_handlep; err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); ioctl_errno = errno; prop_errflags = (zprop_errflags_t)zc.zc_obj; if (err == 0) { nvlist_t *prop_errors; VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, zc.zc_nvlist_dst_size, &prop_errors, 0)); nvpair_t *prop_err = NULL; while ((prop_err = nvlist_next_nvpair(prop_errors, prop_err)) != NULL) { char tbuf[1024]; zfs_prop_t prop; int intval; prop = zfs_name_to_prop(nvpair_name(prop_err)); (void) nvpair_value_int32(prop_err, &intval); if (strcmp(nvpair_name(prop_err), ZPROP_N_MORE_ERRORS) == 0) { trunc_prop_errs(intval); break; } else { (void) snprintf(tbuf, sizeof (tbuf), dgettext(TEXT_DOMAIN, "cannot receive %s property on %s"), nvpair_name(prop_err), zc.zc_name); zfs_setprop_error(hdl, prop, intval, tbuf); } } nvlist_free(prop_errors); } zc.zc_nvlist_dst = 0; zc.zc_nvlist_dst_size = 0; zcmd_free_nvlists(&zc); if (err == 0 && snapprops_nvlist) { zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 }; (void) strcpy(zc2.zc_name, zc.zc_value); zc2.zc_cookie = B_TRUE; /* received */ if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); zcmd_free_nvlists(&zc2); } } if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) { /* * It may be that this snapshot already exists, * in which case we want to consume & ignore it * rather than failing. */ avl_tree_t *local_avl; nvlist_t *local_nv, *fs; cp = strchr(zc.zc_value, '@'); /* * XXX Do this faster by just iterating over snaps in * this fs. Also if zc_value does not exist, we will * get a strange "does not exist" error message. */ *cp = '\0'; if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); fsavl_destroy(local_avl); nvlist_free(local_nv); if (fs != NULL) { - if (flags.verbose) { + if (flags->verbose) { (void) printf("snap %s already exists; " "ignoring\n", zc.zc_value); } err = ioctl_err = recv_skip(hdl, infd, - flags.byteswap); + flags->byteswap); } } *cp = '@'; } if (ioctl_err != 0) { switch (ioctl_errno) { case ENODEV: cp = strchr(zc.zc_value, '@'); *cp = '\0'; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "most recent snapshot of %s does not\n" "match incremental source"), zc.zc_value); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); *cp = '@'; break; case ETXTBSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s has been modified\n" "since most recent snapshot"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; case EEXIST: cp = strchr(zc.zc_value, '@'); if (newfs) { /* it's the containing fs that exists */ *cp = '\0'; } zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination already exists")); (void) zfs_error_fmt(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN, "cannot restore to %s"), zc.zc_value); *cp = '@'; break; case EINVAL: (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid stream (checksum mismatch)")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to receive this stream.")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EDQUOT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s space quota exceeded"), zc.zc_name); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + (void) zfs_error(hdl, EZFS_NOSPC, errbuf); break; default: (void) zfs_standard_error(hdl, ioctl_errno, errbuf); } } /* * Mount the target filesystem (if created). Also mount any * children of the target filesystem if we did a replication * receive (indicated by stream_avl being non-NULL). */ cp = strchr(zc.zc_value, '@'); if (cp && (ioctl_err == 0 || !newfs)) { zfs_handle_t *h; *cp = '\0'; h = zfs_open(hdl, zc.zc_value, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { *cp = '@'; err = zvol_create_link(hdl, h->zfs_name); if (err == 0 && ioctl_err == 0) err = zvol_create_link(hdl, zc.zc_value); } else if (newfs || stream_avl) { /* * Track the first/top of hierarchy fs, * for mounting and sharing later. */ if (top_zfs && *top_zfs == NULL) *top_zfs = zfs_strdup(hdl, zc.zc_value); } zfs_close(h); } *cp = '@'; } if (clp) { err |= changelist_postfix(clp); changelist_free(clp); } if (prop_errflags & ZPROP_ERR_NOCLEAR) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " "failed to clear unreceived properties on %s"), zc.zc_name); (void) fprintf(stderr, "\n"); } if (prop_errflags & ZPROP_ERR_NORESTORE) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " "failed to restore original properties on %s"), zc.zc_name); (void) fprintf(stderr, "\n"); } if (err || ioctl_err) return (-1); *action_handlep = zc.zc_action_handle; - if (flags.verbose) { + if (flags->verbose) { char buf1[64]; char buf2[64]; uint64_t bytes = zc.zc_cookie; time_t delta = time(NULL) - begin_time; if (delta == 0) delta = 1; zfs_nicenum(bytes, buf1, sizeof (buf1)); zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", buf1, delta, buf2); } return (0); } static int -zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, +zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { int err; dmu_replay_record_t drr, drr_noswap; struct drr_begin *drrb = &drr.drr_u.drr_begin; char errbuf[1024]; zio_cksum_t zcksum = { { 0 } }; uint64_t featureflags; int hdrtype; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); - if (flags.isprefix && + if (flags->isprefix && !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " "(%s) does not exist"), tosnap); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* read in the BEGIN record */ if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, &zcksum))) return (err); if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) { /* It's the double end record at the end of a package */ return (ENODATA); } /* the kernel needs the non-byteswapped begin record */ drr_noswap = drr; - flags.byteswap = B_FALSE; + flags->byteswap = B_FALSE; if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { /* * We computed the checksum in the wrong byteorder in * recv_read() above; do it again correctly. */ bzero(&zcksum, sizeof (zio_cksum_t)); fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); - flags.byteswap = B_TRUE; + flags->byteswap = B_TRUE; drr.drr_type = BSWAP_32(drr.drr_type); drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); drrb->drr_magic = BSWAP_64(drrb->drr_magic); drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); drrb->drr_type = BSWAP_32(drrb->drr_type); drrb->drr_flags = BSWAP_32(drrb->drr_flags); drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad magic number)")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo); if (!DMU_STREAM_SUPPORTED(featureflags) || (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "stream has unsupported feature, feature flags = %lx"), featureflags); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } if (strchr(drrb->drr_toname, '@') == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad snapshot name)")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { char nonpackage_sendfs[ZFS_MAXNAMELEN]; if (sendfs == NULL) { /* * We were not called from zfs_receive_package(). Get * the fs specified by 'zfs send'. */ char *cp; (void) strlcpy(nonpackage_sendfs, drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) *cp = '\0'; sendfs = nonpackage_sendfs; } return (zfs_receive_one(hdl, infd, tosnap, flags, &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); } } /* * Restores a backup of tosnap from the file descriptor specified by infd. * Return 0 on total success, -2 if some things couldn't be * destroyed/renamed/promoted, -1 if some things couldn't be received. * (-1 will override -2). */ int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, int infd, avl_tree_t *stream_avl) { char *top_zfs = NULL; int err; int cleanup_fd; uint64_t action_handle = 0; cleanup_fd = open(ZFS_DEV, O_RDWR); VERIFY(cleanup_fd >= 0); err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, stream_avl, &top_zfs, cleanup_fd, &action_handle); VERIFY(0 == close(cleanup_fd)); - if (err == 0 && !flags.nomount && top_zfs) { + if (err == 0 && !flags->nomount && top_zfs) { zfs_handle_t *zhp; prop_changelist_t *clp; zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM); if (zhp != NULL) { clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, CL_GATHER_MOUNT_ALWAYS, 0); zfs_close(zhp); if (clp != NULL) { /* mount and share received datasets */ err = changelist_postfix(clp); changelist_free(clp); } } if (zhp == NULL || clp == NULL || err) err = -1; } if (top_zfs) free(top_zfs); return (err); } diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 5618a5cd307f..de4bb72de4f1 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1,1621 +1,1624 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* * Internal utility routines for the ZFS library. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libzfs_impl.h" #include "zfs_prop.h" int libzfs_errno(libzfs_handle_t *hdl) { return (hdl->libzfs_error); } const char * libzfs_error_action(libzfs_handle_t *hdl) { return (hdl->libzfs_action); } const char * libzfs_error_description(libzfs_handle_t *hdl) { if (hdl->libzfs_desc[0] != '\0') return (hdl->libzfs_desc); switch (hdl->libzfs_error) { case EZFS_NOMEM: return (dgettext(TEXT_DOMAIN, "out of memory")); case EZFS_BADPROP: return (dgettext(TEXT_DOMAIN, "invalid property value")); case EZFS_PROPREADONLY: return (dgettext(TEXT_DOMAIN, "read-only property")); case EZFS_PROPTYPE: return (dgettext(TEXT_DOMAIN, "property doesn't apply to " "datasets of this type")); case EZFS_PROPNONINHERIT: return (dgettext(TEXT_DOMAIN, "property cannot be inherited")); case EZFS_PROPSPACE: return (dgettext(TEXT_DOMAIN, "invalid quota or reservation")); case EZFS_BADTYPE: return (dgettext(TEXT_DOMAIN, "operation not applicable to " "datasets of this type")); case EZFS_BUSY: return (dgettext(TEXT_DOMAIN, "pool or dataset is busy")); case EZFS_EXISTS: return (dgettext(TEXT_DOMAIN, "pool or dataset exists")); case EZFS_NOENT: return (dgettext(TEXT_DOMAIN, "no such pool or dataset")); case EZFS_BADSTREAM: return (dgettext(TEXT_DOMAIN, "invalid backup stream")); case EZFS_DSREADONLY: return (dgettext(TEXT_DOMAIN, "dataset is read-only")); case EZFS_VOLTOOBIG: return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " "this system")); case EZFS_INVALIDNAME: return (dgettext(TEXT_DOMAIN, "invalid name")); case EZFS_BADRESTORE: return (dgettext(TEXT_DOMAIN, "unable to restore to " "destination")); case EZFS_BADBACKUP: return (dgettext(TEXT_DOMAIN, "backup failed")); case EZFS_BADTARGET: return (dgettext(TEXT_DOMAIN, "invalid target vdev")); case EZFS_NODEVICE: return (dgettext(TEXT_DOMAIN, "no such device in pool")); case EZFS_BADDEV: return (dgettext(TEXT_DOMAIN, "invalid device")); case EZFS_NOREPLICAS: return (dgettext(TEXT_DOMAIN, "no valid replicas")); case EZFS_RESILVERING: return (dgettext(TEXT_DOMAIN, "currently resilvering")); case EZFS_BADVERSION: return (dgettext(TEXT_DOMAIN, "unsupported version")); case EZFS_POOLUNAVAIL: return (dgettext(TEXT_DOMAIN, "pool is unavailable")); case EZFS_DEVOVERFLOW: return (dgettext(TEXT_DOMAIN, "too many devices in one vdev")); case EZFS_BADPATH: return (dgettext(TEXT_DOMAIN, "must be an absolute path")); case EZFS_CROSSTARGET: return (dgettext(TEXT_DOMAIN, "operation crosses datasets or " "pools")); case EZFS_ZONED: return (dgettext(TEXT_DOMAIN, "dataset in use by local zone")); case EZFS_MOUNTFAILED: return (dgettext(TEXT_DOMAIN, "mount failed")); case EZFS_UMOUNTFAILED: return (dgettext(TEXT_DOMAIN, "umount failed")); case EZFS_UNSHARENFSFAILED: return (dgettext(TEXT_DOMAIN, "unshare(1M) failed")); case EZFS_SHARENFSFAILED: return (dgettext(TEXT_DOMAIN, "share(1M) failed")); case EZFS_UNSHARESMBFAILED: return (dgettext(TEXT_DOMAIN, "smb remove share failed")); case EZFS_SHARESMBFAILED: return (dgettext(TEXT_DOMAIN, "smb add share failed")); case EZFS_PERM: return (dgettext(TEXT_DOMAIN, "permission denied")); case EZFS_NOSPC: return (dgettext(TEXT_DOMAIN, "out of space")); case EZFS_FAULT: return (dgettext(TEXT_DOMAIN, "bad address")); case EZFS_IO: return (dgettext(TEXT_DOMAIN, "I/O error")); case EZFS_INTR: return (dgettext(TEXT_DOMAIN, "signal received")); case EZFS_ISSPARE: return (dgettext(TEXT_DOMAIN, "device is reserved as a hot " "spare")); case EZFS_INVALCONFIG: return (dgettext(TEXT_DOMAIN, "invalid vdev configuration")); case EZFS_RECURSIVE: return (dgettext(TEXT_DOMAIN, "recursive dataset dependency")); case EZFS_NOHISTORY: return (dgettext(TEXT_DOMAIN, "no history available")); case EZFS_POOLPROPS: return (dgettext(TEXT_DOMAIN, "failed to retrieve " "pool properties")); case EZFS_POOL_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this type of pool")); case EZFS_POOL_INVALARG: return (dgettext(TEXT_DOMAIN, "invalid argument for " "this pool operation")); case EZFS_NAMETOOLONG: return (dgettext(TEXT_DOMAIN, "dataset name is too long")); case EZFS_OPENFAILED: return (dgettext(TEXT_DOMAIN, "open failed")); case EZFS_NOCAP: return (dgettext(TEXT_DOMAIN, "disk capacity information could not be retrieved")); case EZFS_LABELFAILED: return (dgettext(TEXT_DOMAIN, "write of label failed")); case EZFS_BADWHO: return (dgettext(TEXT_DOMAIN, "invalid user/group")); case EZFS_BADPERM: return (dgettext(TEXT_DOMAIN, "invalid permission")); case EZFS_BADPERMSET: return (dgettext(TEXT_DOMAIN, "invalid permission set name")); case EZFS_NODELEGATION: return (dgettext(TEXT_DOMAIN, "delegated administration is " "disabled on pool")); case EZFS_BADCACHE: return (dgettext(TEXT_DOMAIN, "invalid or missing cache file")); case EZFS_ISL2CACHE: return (dgettext(TEXT_DOMAIN, "device is in use as a cache")); case EZFS_VDEVNOTSUP: return (dgettext(TEXT_DOMAIN, "vdev specification is not " "supported")); case EZFS_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this dataset")); case EZFS_ACTIVE_SPARE: return (dgettext(TEXT_DOMAIN, "pool has active shared spare " "device")); case EZFS_UNPLAYED_LOGS: return (dgettext(TEXT_DOMAIN, "log device has unplayed intent " "logs")); case EZFS_REFTAG_RELE: return (dgettext(TEXT_DOMAIN, "no such tag on this dataset")); case EZFS_REFTAG_HOLD: return (dgettext(TEXT_DOMAIN, "tag already exists on this " "dataset")); case EZFS_TAGTOOLONG: return (dgettext(TEXT_DOMAIN, "tag too long")); case EZFS_PIPEFAILED: return (dgettext(TEXT_DOMAIN, "pipe create failed")); case EZFS_THREADCREATEFAILED: return (dgettext(TEXT_DOMAIN, "thread create failed")); case EZFS_POSTSPLIT_ONLINE: return (dgettext(TEXT_DOMAIN, "disk was split from this pool " "into a new one")); case EZFS_SCRUBBING: return (dgettext(TEXT_DOMAIN, "currently scrubbing; " "use 'zpool scrub -s' to cancel current scrub")); case EZFS_NO_SCRUB: return (dgettext(TEXT_DOMAIN, "there is no active scrub")); case EZFS_DIFF: return (dgettext(TEXT_DOMAIN, "unable to generate diffs")); case EZFS_DIFFDATA: return (dgettext(TEXT_DOMAIN, "invalid diff data")); case EZFS_POOLREADONLY: return (dgettext(TEXT_DOMAIN, "pool is read-only")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: assert(hdl->libzfs_error == 0); return (dgettext(TEXT_DOMAIN, "no error")); } } /*PRINTFLIKE2*/ void zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...) { va_list ap; va_start(ap, fmt); (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc), fmt, ap); hdl->libzfs_desc_active = 1; va_end(ap); } static void zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) { (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action), fmt, ap); hdl->libzfs_error = error; if (hdl->libzfs_desc_active) hdl->libzfs_desc_active = 0; else hdl->libzfs_desc[0] = '\0'; if (hdl->libzfs_printerr) { if (error == EZFS_UNKNOWN) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal " "error: %s\n"), libzfs_error_description(hdl)); abort(); } (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action, libzfs_error_description(hdl)); if (error == EZFS_NOMEM) exit(1); } } int zfs_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zfs_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); zfs_verror(hdl, error, fmt, ap); va_end(ap); return (-1); } static int zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) { switch (error) { case EPERM: case EACCES: zfs_verror(hdl, EZFS_PERM, fmt, ap); return (-1); case ECANCELED: zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap); return (-1); case EIO: zfs_verror(hdl, EZFS_IO, fmt, ap); return (-1); case EFAULT: zfs_verror(hdl, EZFS_FAULT, fmt, ap); return (-1); case EINTR: zfs_verror(hdl, EZFS_INTR, fmt, ap); return (-1); } return (0); } int zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zfs_standard_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (zfs_common_error(hdl, error, fmt, ap) != 0) { va_end(ap); return (-1); } switch (error) { case ENXIO: case ENODEV: + case EPIPE: zfs_verror(hdl, EZFS_IO, fmt, ap); break; case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset does not exist")); zfs_verror(hdl, EZFS_NOENT, fmt, ap); break; case ENOSPC: case EDQUOT: zfs_verror(hdl, EZFS_NOSPC, fmt, ap); return (-1); case EEXIST: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); zfs_verror(hdl, EZFS_EXISTS, fmt, ap); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is busy")); zfs_verror(hdl, EZFS_BUSY, fmt, ap); break; case EROFS: zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); break; case ENAMETOOLONG: zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap); break; case ENOTSUP: zfs_verror(hdl, EZFS_BADVERSION, fmt, ap); break; case EAGAIN: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool I/O is currently suspended")); zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; default: zfs_error_aux(hdl, strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); break; } va_end(ap); return (-1); } int zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zpool_standard_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (zfs_common_error(hdl, error, fmt, ap) != 0) { va_end(ap); return (-1); } switch (error) { case ENODEV: zfs_verror(hdl, EZFS_NODEVICE, fmt, ap); break; case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool or dataset")); zfs_verror(hdl, EZFS_NOENT, fmt, ap); break; case EEXIST: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool already exists")); zfs_verror(hdl, EZFS_EXISTS, fmt, ap); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy")); zfs_verror(hdl, EZFS_BUSY, fmt, ap); break; case ENXIO: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is currently unavailable")); zfs_verror(hdl, EZFS_BADDEV, fmt, ap); break; case ENAMETOOLONG: zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap); break; case ENOTSUP: zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap); break; case EINVAL: zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap); break; case ENOSPC: case EDQUOT: zfs_verror(hdl, EZFS_NOSPC, fmt, ap); return (-1); case EAGAIN: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool I/O is currently suspended")); zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; case EROFS: zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); break; default: zfs_error_aux(hdl, strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); } va_end(ap); return (-1); } /* * Display an out of memory error message and abort the current program. */ int no_memory(libzfs_handle_t *hdl) { return (zfs_error(hdl, EZFS_NOMEM, "internal error")); } /* * A safe form of malloc() which will die if the allocation fails. */ void * zfs_alloc(libzfs_handle_t *hdl, size_t size) { void *data; if ((data = calloc(1, size)) == NULL) (void) no_memory(hdl); return (data); } /* * A safe form of asprintf() which will die if the allocation fails. */ /*PRINTFLIKE2*/ char * zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...) { va_list ap; char *ret; int err; va_start(ap, fmt); err = vasprintf(&ret, fmt, ap); va_end(ap); if (err < 0) (void) no_memory(hdl); return (ret); } /* * A safe form of realloc(), which also zeroes newly allocated space. */ void * zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize) { void *ret; if ((ret = realloc(ptr, newsize)) == NULL) { (void) no_memory(hdl); return (NULL); } bzero((char *)ret + oldsize, (newsize - oldsize)); return (ret); } /* * A safe form of strdup() which will die if the allocation fails. */ char * zfs_strdup(libzfs_handle_t *hdl, const char *str) { char *ret; if ((ret = strdup(str)) == NULL) (void) no_memory(hdl); return (ret); } /* * Convert a number to an appropriately human-readable output. */ void zfs_nicenum(uint64_t num, char *buf, size_t buflen) { uint64_t n = num; int index = 0; char u; while (n >= 1024) { n /= 1024; index++; } u = " KMGTPE"[index]; if (index == 0) { (void) snprintf(buf, buflen, "%llu", (u_longlong_t) n); } else if ((num & ((1ULL << 10 * index) - 1)) == 0) { /* * If this is an even multiple of the base, always display * without any decimal precision. */ (void) snprintf(buf, buflen, "%llu%c", (u_longlong_t) n, u); } else { /* * We want to choose a precision that reflects the best choice * for fitting in 5 characters. This can get rather tricky when * we have numbers that are very close to an order of magnitude. * For example, when displaying 10239 (which is really 9.999K), * we want only a single place of precision for 10.0K. We could * develop some complex heuristics for this, but it's much * easier just to try each combination in turn. */ int i; for (i = 2; i >= 0; i--) { if (snprintf(buf, buflen, "%.*f%c", i, (double)num / (1ULL << 10 * index), u) <= 5) break; } } } void libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr) { hdl->libzfs_printerr = printerr; } static int libzfs_module_loaded(const char *module) { const char path_prefix[] = "/sys/module/"; char path[256]; memcpy(path, path_prefix, sizeof(path_prefix) - 1); strcpy(path + sizeof(path_prefix) - 1, module); return (access(path, F_OK) == 0); } int libzfs_run_process(const char *path, char *argv[], int flags) { pid_t pid; int rc, devnull_fd; pid = vfork(); if (pid == 0) { devnull_fd = open("/dev/null", O_WRONLY); if (devnull_fd < 0) _exit(-1); if (!(flags & STDOUT_VERBOSE)) (void) dup2(devnull_fd, STDOUT_FILENO); if (!(flags & STDERR_VERBOSE)) (void) dup2(devnull_fd, STDERR_FILENO); close(devnull_fd); (void) execvp(path, argv); _exit(-1); } else if (pid > 0) { int status; while ((rc = waitpid(pid, &status, 0)) == -1 && errno == EINTR); if (rc < 0 || !WIFEXITED(status)) return -1; return WEXITSTATUS(status); } return -1; } int libzfs_load_module(const char *module) { char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0}; if (libzfs_module_loaded(module)) return 0; return libzfs_run_process("/sbin/modprobe", argv, 0); } libzfs_handle_t * libzfs_init(void) { libzfs_handle_t *hdl; if (libzfs_load_module("zfs") != 0) { (void) fprintf(stderr, gettext("Failed to load ZFS module " "stack.\nLoad the module manually by running " "'insmod /zfs.ko' as root.\n")); return (NULL); } if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) { return (NULL); } if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { (void) fprintf(stderr, gettext("Unable to open %s: %s.\n"), ZFS_DEV, strerror(errno)); if (errno == ENOENT) (void) fprintf(stderr, gettext("Verify the ZFS module stack is " "loaded by running '/sbin/modprobe zfs'.\n")); free(hdl); return (NULL); } #ifdef HAVE_SETMNTENT if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "r")) == NULL) { #else if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) { #endif (void) close(hdl->libzfs_fd); free(hdl); return (NULL); } hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r"); zfs_prop_init(); zpool_prop_init(); libzfs_mnttab_init(hdl); return (hdl); } void libzfs_fini(libzfs_handle_t *hdl) { (void) close(hdl->libzfs_fd); if (hdl->libzfs_mnttab) #ifdef HAVE_SETMNTENT (void) endmntent(hdl->libzfs_mnttab); #else (void) fclose(hdl->libzfs_mnttab); #endif if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); zfs_uninit_libshare(hdl); if (hdl->libzfs_log_str) (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); libzfs_fru_clear(hdl, B_TRUE); namespace_clear(hdl); libzfs_mnttab_fini(hdl); free(hdl); } libzfs_handle_t * zpool_get_handle(zpool_handle_t *zhp) { return (zhp->zpool_hdl); } libzfs_handle_t * zfs_get_handle(zfs_handle_t *zhp) { return (zhp->zfs_hdl); } zpool_handle_t * zfs_get_pool_handle(const zfs_handle_t *zhp) { return (zhp->zpool_hdl); } /* * Given a name, determine whether or not it's a valid path * (starts with '/' or "./"). If so, walk the mnttab trying * to match the device number. If not, treat the path as an * fs/vol/snap name. */ zfs_handle_t * zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype) { struct stat64 statbuf; struct extmnttab entry; int ret; if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) { /* * It's not a valid path, assume it's a name of type 'argtype'. */ return (zfs_open(hdl, path, argtype)); } if (stat64(path, &statbuf) != 0) { (void) fprintf(stderr, "%s: %s\n", path, strerror(errno)); return (NULL); } rewind(hdl->libzfs_mnttab); while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) { if (makedevice(entry.mnt_major, entry.mnt_minor) == statbuf.st_dev) { break; } } if (ret != 0) { return (NULL); } if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { (void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"), path); return (NULL); } return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM)); } /* * Given a shorthand device name, check if a file by that name exists in a list * of directories under /dev. If one is found, store its full path in the * buffer pointed to by the path argument and return 0, else return -1. The * path buffer must be allocated by the caller. */ int zfs_resolve_shortname(const char *name, char *path, size_t pathlen) { int i, err; char dirs[6][9] = {"by-id", "by-label", "by-path", "by-uuid", "zpool", "by-vdev"}; /* /dev/ */ (void) snprintf(path, pathlen, "%s/%s", DISK_ROOT, name); err = access(path, F_OK); if (err == 0) return (err); /* /dev/mapper/ */ (void) snprintf(path, pathlen, "%s/mapper/%s", DISK_ROOT, name); err = access(path, F_OK); if (err == 0) return (err); /* /dev/disk// */ for (i = 0; i < 6 && err < 0; i++) { (void) snprintf(path, pathlen, "%s/%s/%s", UDISK_ROOT, dirs[i], name); err = access(path, F_OK); } return (err); } /* * Append partition suffix to a device path. This should be used to generate * the name of a whole disk as it is stored in the vdev label. The * user-visible names of whole disks do not contain the partition information. * Modifies buf which must be allocated by the caller. */ void zfs_append_partition(const char *path, char *buf, size_t buflen) { if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) (void) snprintf(buf, buflen, "%s%s%s", path, "-part", FIRST_SLICE); else (void) snprintf(buf, buflen, "%s%s%s", path, isdigit(path[strlen(path)-1]) ? "p" : "", FIRST_SLICE); } /* * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from * an ioctl(). */ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) len = 16 * 1024; zc->zc_nvlist_dst_size = len; if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0) return (-1); return (0); } /* * Called when an ioctl() which returns an nvlist fails with ENOMEM. This will * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was * filled in by the kernel to indicate the actual required size. */ int zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc) { free((void *)(uintptr_t)zc->zc_nvlist_dst); if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0) return (-1); return (0); } /* * Called to free the src and dst nvlists stored in the command structure. */ void zcmd_free_nvlists(zfs_cmd_t *zc) { free((void *)(uintptr_t)zc->zc_nvlist_conf); free((void *)(uintptr_t)zc->zc_nvlist_src); free((void *)(uintptr_t)zc->zc_nvlist_dst); } static int zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, nvlist_t *nvl) { char *packed; size_t len; verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0); if ((packed = zfs_alloc(hdl, len)) == NULL) return (-1); verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); *outnv = (uint64_t)(uintptr_t)packed; *outlen = len; return (0); } int zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) { return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf, &zc->zc_nvlist_conf_size, nvl)); } int zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) { return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src, &zc->zc_nvlist_src_size, nvl)); } /* * Unpacks an nvlist from the ZFS ioctl command structure. */ int zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) { if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst, zc->zc_nvlist_dst_size, nvlp, 0) != 0) return (no_memory(hdl)); return (0); } int zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc) { int error; zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str; error = ioctl(hdl->libzfs_fd, request, zc); if (hdl->libzfs_log_str) { free(hdl->libzfs_log_str); hdl->libzfs_log_str = NULL; } zc->zc_history = 0; return (error); } /* * ================================================================ * API shared by zfs and zpool property management * ================================================================ */ static void zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) { zprop_list_t *pl = cbp->cb_proplist; int i; char *title; size_t len; cbp->cb_first = B_FALSE; if (cbp->cb_scripted) return; /* * Start with the length of the column headers. */ cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME")); cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN, "PROPERTY")); cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN, "VALUE")); cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN, "RECEIVED")); cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN, "SOURCE")); /* first property is always NAME */ assert(cbp->cb_proplist->pl_prop == ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME)); /* * Go through and calculate the widths for each column. For the * 'source' column, we kludge it up by taking the worst-case scenario of * inheriting from the longest name. This is acceptable because in the * majority of cases 'SOURCE' is the last column displayed, and we don't * use the width anyway. Note that the 'VALUE' column can be oversized, * if the name of the property is much longer than any values we find. */ for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { /* * 'PROPERTY' column */ if (pl->pl_prop != ZPROP_INVAL) { const char *propname = (type == ZFS_TYPE_POOL) ? zpool_prop_to_name(pl->pl_prop) : zfs_prop_to_name(pl->pl_prop); len = strlen(propname); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; } else { len = strlen(pl->pl_user_prop); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; } /* * 'VALUE' column. The first property is always the 'name' * property that was tacked on either by /sbin/zfs's * zfs_do_get() or when calling zprop_expand_list(), so we * ignore its width. If the user specified the name property * to display, then it will be later in the list in any case. */ if (pl != cbp->cb_proplist && pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE]) cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width; /* 'RECEIVED' column. */ if (pl != cbp->cb_proplist && pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD]) cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width; /* * 'NAME' and 'SOURCE' columns */ if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME : ZFS_PROP_NAME) && pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) { cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width; cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width + strlen(dgettext(TEXT_DOMAIN, "inherited from")); } } /* * Now go through and print the headers. */ for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: title = dgettext(TEXT_DOMAIN, "NAME"); break; case GET_COL_PROPERTY: title = dgettext(TEXT_DOMAIN, "PROPERTY"); break; case GET_COL_VALUE: title = dgettext(TEXT_DOMAIN, "VALUE"); break; case GET_COL_RECVD: title = dgettext(TEXT_DOMAIN, "RECEIVED"); break; case GET_COL_SOURCE: title = dgettext(TEXT_DOMAIN, "SOURCE"); break; default: title = NULL; } if (title != NULL) { if (i == (ZFS_GET_NCOLS - 1) || cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", title); else (void) printf("%-*s ", cbp->cb_colwidths[cbp->cb_columns[i]], title); } } (void) printf("\n"); } /* * Display a single line of output, according to the settings in the callback * structure. */ void zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, const char *propname, const char *value, zprop_source_t sourcetype, const char *source, const char *recvd_value) { int i; const char *str = NULL; char buf[128]; /* * Ignore those source types that the user has chosen to ignore. */ if ((sourcetype & cbp->cb_sources) == 0) return; if (cbp->cb_first) zprop_print_headers(cbp, cbp->cb_type); for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: str = name; break; case GET_COL_PROPERTY: str = propname; break; case GET_COL_VALUE: str = value; break; case GET_COL_SOURCE: switch (sourcetype) { case ZPROP_SRC_NONE: str = "-"; break; case ZPROP_SRC_DEFAULT: str = "default"; break; case ZPROP_SRC_LOCAL: str = "local"; break; case ZPROP_SRC_TEMPORARY: str = "temporary"; break; case ZPROP_SRC_INHERITED: (void) snprintf(buf, sizeof (buf), "inherited from %s", source); str = buf; break; case ZPROP_SRC_RECEIVED: str = "received"; break; } break; case GET_COL_RECVD: str = (recvd_value == NULL ? "-" : recvd_value); break; default: continue; } if (cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", str); else if (cbp->cb_scripted) (void) printf("%s\t", str); else (void) printf("%-*s ", cbp->cb_colwidths[cbp->cb_columns[i]], str); } (void) printf("\n"); } /* * Given a numeric suffix, convert the value into a number of bits that the * resulting value must be shifted. */ static int str2shift(libzfs_handle_t *hdl, const char *buf) { const char *ends = "BKMGTPEZ"; int i; if (buf[0] == '\0') return (0); for (i = 0; i < strlen(ends); i++) { if (toupper(buf[0]) == ends[i]) break; } if (i == strlen(ends)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid numeric suffix '%s'"), buf); return (-1); } /* * Allow 'G' = 'GB' = 'GiB', case-insensitively. * However, 'BB' and 'BiB' are disallowed. */ if (buf[1] == '\0' || (toupper(buf[0]) != 'B' && ((toupper(buf[1]) == 'B' && buf[2] == '\0') || (toupper(buf[1]) == 'I' && toupper(buf[2]) == 'B' && buf[3] == '\0')))) return (10*i); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid numeric suffix '%s'"), buf); return (-1); } /* * Convert a string of the form '100G' into a real number. Used when setting * properties or creating a volume. 'buf' is used to place an extended error * message for the caller to use. */ int zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) { char *end; int shift; *num = 0; /* Check to see if this looks like a number. */ if ((value[0] < '0' || value[0] > '9') && value[0] != '.') { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "bad numeric value '%s'"), value); return (-1); } /* Rely on strtoull() to process the numeric portion. */ errno = 0; *num = strtoull(value, &end, 10); /* * Check for ERANGE, which indicates that the value is too large to fit * in a 64-bit value. */ if (errno == ERANGE) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } /* * If we have a decimal value, then do the computation with floating * point arithmetic. Otherwise, use standard arithmetic. */ if (*end == '.') { double fval = strtod(value, &end); if ((shift = str2shift(hdl, end)) == -1) return (-1); fval *= pow(2, shift); if (fval > UINT64_MAX) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } *num = (uint64_t)fval; } else { if ((shift = str2shift(hdl, end)) == -1) return (-1); /* Check for overflow */ if (shift >= 64 || (*num << shift) >> shift != *num) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } *num <<= shift; } return (0); } /* * Given a propname=value nvpair to set, parse any numeric properties * (index, boolean, etc) if they are specified as strings and add the * resulting nvpair to the returned nvlist. * * At the DSL layer, all properties are either 64-bit numbers or strings. * We want the user to be able to ignore this fact and specify properties * as native values (numbers, for example) or as strings (to simplify * command line utilities). This also handles converting index types * (compression, checksum, etc) from strings to their on-disk index. */ int zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp, const char *errbuf) { data_type_t datatype = nvpair_type(elem); zprop_type_t proptype; const char *propname; char *value; boolean_t isnone = B_FALSE; if (type == ZFS_TYPE_POOL) { proptype = zpool_prop_get_type(prop); propname = zpool_prop_to_name(prop); } else { proptype = zfs_prop_get_type(prop); propname = zfs_prop_to_name(prop); } /* * Convert any properties to the internal DSL value types. */ *svalp = NULL; *ivalp = 0; switch (proptype) { case PROP_TYPE_STRING: if (datatype != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), nvpair_name(elem)); goto error; } (void) nvpair_value_string(elem, svalp); if (strlen(*svalp) >= ZFS_MAXPROPLEN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is too long"), nvpair_name(elem)); goto error; } break; case PROP_TYPE_NUMBER: if (datatype == DATA_TYPE_STRING) { (void) nvpair_value_string(elem, &value); if (strcmp(value, "none") == 0) { isnone = B_TRUE; } else if (zfs_nicestrtonum(hdl, value, ivalp) != 0) { goto error; } } else if (datatype == DATA_TYPE_UINT64) { (void) nvpair_value_uint64(elem, ivalp); } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a number"), nvpair_name(elem)); goto error; } /* * Quota special: force 'none' and don't allow 0. */ if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone && (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "use 'none' to disable quota/refquota")); goto error; } break; case PROP_TYPE_INDEX: if (datatype != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), nvpair_name(elem)); goto error; } (void) nvpair_value_string(elem, &value); if (zprop_string_to_index(prop, value, ivalp, type) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be one of '%s'"), propname, zprop_values(prop, type)); goto error; } break; default: abort(); } /* * Add the result to our return set of properties. */ if (*svalp != NULL) { if (nvlist_add_string(ret, propname, *svalp) != 0) { (void) no_memory(hdl); return (-1); } } else { if (nvlist_add_uint64(ret, propname, *ivalp) != 0) { (void) no_memory(hdl); return (-1); } } return (0); error: (void) zfs_error(hdl, EZFS_BADPROP, errbuf); return (-1); } static int addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp, zfs_type_t type) { int prop; zprop_list_t *entry; prop = zprop_name_to_prop(propname, type); if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type)) prop = ZPROP_INVAL; /* * When no property table entry can be found, return failure if * this is a pool property or if this isn't a user-defined * dataset property, */ if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL || - (!zfs_prop_user(propname) && !zfs_prop_userquota(propname)))) { + (!zfs_prop_user(propname) && !zfs_prop_userquota(propname) && + !zfs_prop_written(propname)))) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) return (-1); entry->pl_prop = prop; if (prop == ZPROP_INVAL) { if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) { free(entry); return (-1); } entry->pl_width = strlen(propname); } else { entry->pl_width = zprop_width(prop, &entry->pl_fixed, type); } *listp = entry; return (0); } /* * Given a comma-separated list of properties, construct a property list * containing both user-defined and native properties. This function will * return a NULL list if 'all' is specified, which can later be expanded * by zprop_expand_list(). */ int zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp, zfs_type_t type) { *listp = NULL; /* * If 'all' is specified, return a NULL list. */ if (strcmp(props, "all") == 0) return (0); /* * If no props were specified, return an error. */ if (props[0] == '\0') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no properties specified")); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } /* * It would be nice to use getsubopt() here, but the inclusion of column * aliases makes this more effort than it's worth. */ while (*props != '\0') { size_t len; char *p; char c; if ((p = strchr(props, ',')) == NULL) { len = strlen(props); p = props + len; } else { len = p - props; } /* * Check for empty options. */ if (len == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "empty property name")); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } /* * Check all regular property names. */ c = props[len]; props[len] = '\0'; if (strcmp(props, "space") == 0) { static char *spaceprops[] = { "name", "avail", "used", "usedbysnapshots", "usedbydataset", "usedbyrefreservation", "usedbychildren", NULL }; int i; for (i = 0; spaceprops[i]; i++) { if (addlist(hdl, spaceprops[i], listp, type)) return (-1); listp = &(*listp)->pl_next; } } else { if (addlist(hdl, props, listp, type)) return (-1); listp = &(*listp)->pl_next; } props = p; if (c == ',') props++; } return (0); } void zprop_free_list(zprop_list_t *pl) { zprop_list_t *next; while (pl != NULL) { next = pl->pl_next; free(pl->pl_user_prop); free(pl); pl = next; } } typedef struct expand_data { zprop_list_t **last; libzfs_handle_t *hdl; zfs_type_t type; } expand_data_t; int zprop_expand_list_cb(int prop, void *cb) { zprop_list_t *entry; expand_data_t *edp = cb; if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL) return (ZPROP_INVAL); entry->pl_prop = prop; entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type); entry->pl_all = B_TRUE; *(edp->last) = entry; edp->last = &entry->pl_next; return (ZPROP_CONT); } int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type) { zprop_list_t *entry; zprop_list_t **last; expand_data_t exp; if (*plp == NULL) { /* * If this is the very first time we've been called for an 'all' * specification, expand the list to include all native * properties. */ last = plp; exp.last = last; exp.hdl = hdl; exp.type = type; if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE, B_FALSE, type) == ZPROP_INVAL) return (-1); /* * Add 'name' to the beginning of the list, which is handled * specially. */ if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) return (-1); entry->pl_prop = (type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME; entry->pl_width = zprop_width(entry->pl_prop, &entry->pl_fixed, type); entry->pl_all = B_TRUE; entry->pl_next = *plp; *plp = entry; } return (0); } int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type) { return (zprop_iter_common(func, cb, show_all, ordered, type)); } diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 3fd91a99dd2b..62ca9da2b434 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -1,3056 +1,3260 @@ '\" te .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2011 by Delphix. All rights reserved. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with -.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH zfs 8 "8 Apr 2011" "ZFS pool 28, filesystem 5" "System Administration Commands" +.TH zfs 8 "10 Jul 2012" "ZFS pool 28, filesystem 5" "System Administration Commands" .SH NAME zfs \- configures ZFS file systems .SH SYNOPSIS .LP .nf \fBzfs\fR [\fB-?\fR] .fi .LP .nf \fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR .fi .LP .nf \fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR .fi .LP .nf -\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR +\fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf -\fBzfs\fR \fBdestroy\fR [\fB-rRd\fR] \fIsnapshot\fR +\fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] .fi .LP .nf \fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR .fi .LP .nf \fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBclone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR .fi .LP .nf \fBzfs\fR \fBrename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBrename\fR [\fB-p\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-H\fR][\fB-o\fR \fIproperty\fR[,...]] [\fB-t\fR \fItype\fR[,...]] [\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fIsnap\fR] ... .fi .LP .nf \fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ... .fi .LP .nf \fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ... .fi .LP .nf \fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume|snapshot\fR ... .fi .LP .nf \fBzfs\fR \fBupgrade\fR [\fB-v\fR] .fi .LP .nf \fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR .fi .LP .nf \fBzfs\fR \fBuserspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ... - [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR + [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBgroupspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ... - [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR + [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBmount\fR .fi .LP .nf \fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR .fi .LP .nf \fBzfs\fR \fBunmount | umount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR .fi .LP .nf \fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR .fi .LP .nf \fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR .fi .LP .nf -\fBzfs\fR \fBsend\fR [\fB-vRDp\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-DnPpRrv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] \fB-d\fR \fIfilesystem\fR .fi .LP .nf \fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBallow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|\fI@setname\fR[,...] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBunallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[ ... ]] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf \fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... .fi .LP .nf \fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR... .fi .LP .nf \fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... .fi .SH DESCRIPTION .sp .LP The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(8). A dataset is identified by a unique path within the \fBZFS\fR namespace. For example: .sp .in +2 .nf pool/{filesystem,volume,snapshot} .fi .in -2 .sp .sp .LP where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes). .sp .LP A dataset can be one of the following: .sp .ne 2 .mk .na \fB\fIfile system\fR\fR .ad .sp .6 .RS 4n A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space. .RE .sp .ne 2 .mk .na \fB\fIvolume\fR\fR .ad .sp .6 .RS 4n A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. .RE .sp .ne 2 .mk .na \fB\fIsnapshot\fR\fR .ad .sp .6 .RS 4n A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem@name\fR or \fIvolume@name\fR. .RE .SS "ZFS File System Hierarchy" .sp .LP A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy. .sp .LP The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(8) command. .sp .LP See \fBzpool\fR(8) for more information on creating and administering pools. .SS "Snapshots" .sp .LP A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset. .sp .LP Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently. .sp .LP File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property. .SS "Clones" .sp .LP A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space. .sp .LP Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist. .sp .LP The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from. .SS "Mount Points" .sp .LP Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/fstab\fR file. All automatically managed file systems are mounted by \fBZFS\fR at boot time. .sp .LP By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed. .sp .LP A file system can also have a mount point set in the \fBmountpoint\fR property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the \fBzfs mount -a\fR command is invoked (without editing \fB/etc/fstab\fR). The \fBmountpoint\fR property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR. .sp .LP A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system from being mounted. .sp .LP If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/fstab\fR). If a file system's mount point is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system. .SS "Native Properties" .sp .LP Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below. .sp .LP Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots). .sp .LP The values of numeric properties can be specified using human-readable suffixes (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR for zettabyte). The following are all valid (and equal) specifications: .sp .in +2 .nf 1536M, 1.5g, 1.50GB .fi .in -2 .sp .sp .LP The values of non-numeric properties are case sensitive and must be lowercase, except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR. .sp .LP The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted. .sp .ne 2 .mk .na \fB\fBavailable\fR\fR .ad .sp .6 .RS 4n The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool. .sp This property can also be referred to by its shortened column name, \fBavail\fR. .RE .sp .ne 2 .mk .na \fB\fBcompressratio\fR\fR .ad .sp .6 .RS 4n For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. .RE .sp .ne 2 .mk .na \fB\fBcreation\fR\fR .ad .sp .6 .RS 4n The time this dataset was created. .RE .sp .ne 2 .mk .na +\fB\fBclones\fR\fR +.ad +.sp .6 +.RS 4n +For snapshots, this property is a comma-separated list of filesystems or +volumes which are clones of this snapshot. The clones' \fBorigin\fR property +is this snapshot. If the \fBclones\fR property is not empty, then this +snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options). +.RE + +.sp +.ne 2 +.na \fB\fBdefer_destroy\fR\fR .ad .sp .6 .RS 4n This property is \fBon\fR if the snapshot has been marked for deferred destruction by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR. .RE .sp .ne 2 .mk .na \fB\fBmounted\fR\fR .ad .sp .6 .RS 4n For file systems, indicates whether the file system is currently mounted. This property can be either \fByes\fR or \fBno\fR. .RE .sp .ne 2 .mk .na \fB\fBorigin\fR\fR .ad .sp .6 .RS 4n -For cloned file systems or volumes, the snapshot from which the clone was created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR options) so long as a clone exists. +For cloned file systems or volumes, the snapshot from which the clone was created. See also the \fBclones\fR property. .RE .sp .ne 2 .mk .na \fB\fBreferenced\fR\fR .ad .sp .6 .RS 4n The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical. .sp This property can also be referred to by its shortened column name, \fBrefer\fR. .RE .sp .ne 2 .mk .na \fB\fBrefcompressratio\fR\fR .ad .sp .6 .RS 4n The compression ratio achieved for the \fBreferenced\fR space of this dataset, expressed as a multiplier. See also the \fBcompressratio\fR property. .RE .sp .ne 2 .mk .na \fB\fBtype\fR\fR .ad .sp .6 .RS 4n The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR. .RE .sp .ne 2 .mk .na \fB\fBused\fR\fR .ad .sp .6 .RS 4n The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets. The amount of space that a dataset consumes from its parent, as well as the amount of space that are freed if this dataset is recursively destroyed, is the greater of its space used and its reservation. .sp When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots. .sp The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(2) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately. .RE .sp .ne 2 .mk .na \fB\fBusedby*\fR\fR .ad .sp .6 .RS 4n The \fBusedby*\fR properties decompose the \fBused\fR properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on \fBzpool\fR "version 13" pools. .RE .sp .ne 2 .mk .na \fB\fBusedbychildren\fR\fR .ad .sp .6 .RS 4n The amount of space used by children of this dataset, which would be freed if all the dataset's children were destroyed. .RE .sp .ne 2 .mk .na \fB\fBusedbydataset\fR\fR .ad .sp .6 .RS 4n The amount of space used by this dataset itself, which would be freed if the dataset were destroyed (after first removing any \fBrefreservation\fR and destroying any necessary snapshots or descendents). .RE .sp .ne 2 .mk .na \fB\fBusedbyrefreservation\fR\fR .ad .sp .6 .RS 4n The amount of space used by a \fBrefreservation\fR set on this dataset, which would be freed if the \fBrefreservation\fR was removed. .RE .sp .ne 2 .mk .na \fB\fBusedbysnapshots\fR\fR .ad .sp .6 .RS 4n The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots. .RE .sp .ne 2 .mk .na \fB\fBuserused@\fR\fIuser\fR\fR .ad .sp .6 .RS 4n The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information. .sp Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage. .sp The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: .RS +4 .TP .ie t \(bu .el o \fIPOSIX name\fR (for example, \fBjoe\fR) .RE .RS +4 .TP .ie t \(bu .el o \fIPOSIX numeric ID\fR (for example, \fB789\fR) .RE .RS +4 .TP .ie t \(bu .el o \fISID name\fR (for example, \fBjoe.smith@mydomain\fR) .RE .RS +4 .TP .ie t \(bu .el o \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) .RE .RE .sp .ne 2 .mk .na \fB\fBuserrefs\fR\fR .ad .sp .6 .RS 4n This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command. .RE .sp .ne 2 .mk .na \fB\fBgroupused@\fR\fIgroup\fR\fR .ad .sp .6 .RS 4n The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information. .sp Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage. .RE .sp .ne 2 .mk .na \fB\fBvolblocksize\fR=\fIblocksize\fR\fR .ad .sp .6 .RS 4n For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid. .sp This property can also be referred to by its shortened column name, \fBvolblock\fR. .RE +.sp +.ne 2 +.na +\fB\fBwritten\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +previous snapshot. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten@\fR\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +specified snapshot. This is the space that is referenced by this dataset +but was not referenced by the specified snapshot. +.sp +The \fIsnapshot\fR may be specified as a short snapshot name (just the part +after the \fB@\fR), in which case it will be interpreted as a snapshot in +the same filesystem as this dataset. +The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR), +which for clones may be a snapshot in the origin's filesystem (or the origin +of the origin's filesystem, etc). +.RE + .sp .LP The following native properties can be used to change the behavior of a \fBZFS\fR dataset. .sp .ne 2 .mk .na \fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR .ad .sp .6 .RS 4n Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value \fBrestricted\fR (the default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit. .sp When the property value is set to \fBpassthrough\fR, files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application. .RE .sp .ne 2 .mk .na \fB\fBaclmode\fR=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR .ad .sp .6 .RS 4n Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an \fBaclmode\fR property of \fBdiscard\fR deletes all \fBACL\fR entries that do not represent the mode of the file. An \fBaclmode\fR property of \fBgroupmask\fR (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an \fBaclmode\fR property of \fBpassthrough\fR indicates that no changes are made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory. .RE .sp .ne 2 .mk .na \fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR. .RE .sp .ne 2 .mk .na \fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR .ad .sp .6 .RS 4n If this property is set to \fBoff\fR, the file system cannot be mounted, and is ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset still has a normal \fBmountpoint\fR property, which can be inherited. Setting this property to \fBoff\fR allows datasets to be used solely as a mechanism to inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have two datasets with the same \fBmountpoint\fR, so that the children of both datasets appear in the same directory, but might have different inherited characteristics. .sp When the \fBnoauto\fR option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or unmounted by the \fBzfs unmount -a\fR command. .sp This property is not inherited. .RE .sp .ne 2 .mk .na \fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2,\fR| \fBfletcher4\fR | \fBsha256\fR\fR .ad .sp .6 .RS 4n Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher2\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice. .sp Changing this property affects only newly-written data. .RE .sp .ne 2 .mk .na \fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR .ad .sp .6 .RS 4n Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to \fBon\fR uses the \fBlzjb\fR compression algorithm. .sp The \fBgzip\fR compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR (which is also the default for \fBgzip\fR(1)). .sp The \fBzle\fR (zero-length encoding) compression algorithm is a fast and simple algorithm to eliminate runs of zeroes. .sp This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data. .RE .sp .ne 2 .mk .na \fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR .ad .sp .6 .RS 4n Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations. .sp Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option. .RE .sp .ne 2 .mk .na \fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether device nodes can be opened on this file system. The default value is \fBon\fR. .RE .sp .ne 2 .mk .na \fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether processes can be executed from within this file system. The default value is \fBon\fR. .RE .sp .ne 2 .mk .na \fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR .ad .sp .6 .RS 4n Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used. .sp When the \fBmountpoint\fR property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location. .RE .sp .ne 2 .mk .na \fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether the file system should be mounted with \fBnbmand\fR (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(8) for more information on \fBnbmand\fR mounts. .RE .sp .ne 2 .mk .na \fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR .ad .sp .6 .RS 4n Controls what is cached in the primary cache (ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. .RE .sp .ne 2 .mk .na \fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit. .sp Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implicit quota. .RE .sp .ne 2 .mk .na \fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n Limits the amount of space consumed by the specified user. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property. .sp Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information. .sp Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota. .sp This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: .RS +4 .TP .ie t \(bu .el o \fIPOSIX name\fR (for example, \fBjoe\fR) .RE .RS +4 .TP .ie t \(bu .el o \fIPOSIX numeric ID\fR (for example, \fB789\fR) .RE .RS +4 .TP .ie t \(bu .el o \fISID name\fR (for example, \fBjoe.smith@mydomain\fR) .RE .RS +4 .TP .ie t \(bu .el o \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) .RE .RE .sp .ne 2 .mk .na \fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property. .sp Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas. .RE .sp .ne 2 .mk .na \fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether this dataset can be modified. The default value is \fBoff\fR. .sp This property can also be referred to by its shortened column name, \fBrdonly\fR. .RE .sp .ne 2 .mk .na \fB\fBrecordsize\fR=\fIsize\fR\fR .ad .sp .6 .RS 4n Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical access patterns. .sp For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance. .sp The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes. .sp Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected. .sp This property can also be referred to by its shortened column name, \fBrecsize\fR. .RE .sp .ne 2 .mk .na \fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots. .RE .sp .ne 2 .mk .na \fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by \fBrefreservation\fR. The \fBrefreservation\fR reservation is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations. .sp If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset. .sp This property can also be referred to by its shortened column name, \fBrefreserv\fR. .RE .sp .ne 2 .mk .na \fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR .ad .sp .6 .RS 4n The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations. .sp This property can also be referred to by its shortened column name, \fBreserv\fR. .RE .sp .ne 2 .mk .na \fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR .ad .sp .6 .RS 4n Controls what is cached in the secondary cache (L2ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. .RE .sp .ne 2 .mk .na \fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is \fBon\fR. .RE .sp .ne 2 .mk .na \fB\fBshareiscsi\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Like the \fBsharenfs\fR property, \fBshareiscsi\fR indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are \fBon\fR, \fBoff\fR, and \fBtype=disk\fR. The default value is \fBoff\fR. In the future, other target types might be supported. For example, \fBtape\fR. .sp You might want to set \fBshareiscsi=on\fR for a file system so that all \fBZFS\fR volumes within the file system are shared by default. However, setting this property on a file system has no direct effect. .RE .sp .ne 2 .mk .na \fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR .ad .sp .6 .RS 4n Controls whether the file system is shared by using the Solaris \fBCIFS\fR service, and what options are to be used. A file system with the \fBsharesmb\fR property set to \fBoff\fR is managed through traditional tools such as \fBsharemgr\fR(1M). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBsharemgr\fR(1M) command is invoked with no options. Otherwise, the \fBsharemgr\fR(1M) command is invoked with options equivalent to the contents of this property. .sp Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore (\fB_\fR) characters. A pseudo property "name" is also supported that allows you to replace the data set name with a specified name. The specified name is then used to replace the prefix dataset in the case of inheritance. For example, if the dataset \fBdata/home/john\fR is set to \fBname=john\fR, then \fBdata/home/john\fR has a resource name of \fBjohn\fR. If a child dataset of \fBdata/home/john/backups\fR, it has a resource name of \fBjohn_backups\fR. .sp When SMB shares are created, the SMB share name appears as an entry in the \fB\&.zfs/shares\fR directory. You can use the \fBls\fR or \fBchmod\fR command to display the share-level ACLs on the entries in this directory. .sp When the \fBsharesmb\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously set to \fBoff\fR, or if they were shared before the property was changed. If the new property is set to \fBoff\fR, the file systems are unshared. .RE .sp .ne 2 .mk .na \fB\fBsharenfs\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR .ad .sp .6 .RS 4n Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property. .sp When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously \fBoff\fR, or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared. .RE .sp .ne 2 .mk .na \fB\fBlogbias\fR = \fBlatency\fR | \fBthroughput\fR\fR .ad .sp .6 .RS 4n Provide a hint to ZFS about handling of synchronous requests in this dataset. If \fBlogbias\fR is set to \fBlatency\fR (the default), ZFS will use pool log devices (if configured) to handle the requests at low latency. If \fBlogbias\fR is set to \fBthroughput\fR, ZFS will not use configured pool log devices. ZFS will instead optimize synchronous operations for global pool throughput and efficient use of resources. .RE .sp .ne 2 .mk .na \fB\fBsnapdir\fR=\fBhidden\fR | \fBvisible\fR\fR .ad .sp .6 .RS 4n Controls whether the \fB\&.zfs\fR directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is \fBhidden\fR. .RE .sp .ne 2 .mk .na +\fB\fBsync\fR=\fBdefault\fR | \fBalways\fR | \fBdisabled\fR\fR +.ad +.sp .6 +.RS 4n +Controls the behavior of synchronous requests (e.g. fsync, O_DSYNC). +\fBdefault\fR is the POSIX specified behavior of ensuring all synchronous +requests are written to stable storage and all devices are flushed to ensure +data is not cached by device controllers (this is the default). \fBalways\fR +causes every file system transaction to be written and flushed before its +system call returns. This has a large performance penalty. \fBdisabled\fR +disables synchronous requests. File system transactions are only committed to +stable storage periodically. This option will give the highest performance. +However, it is very dangerous as ZFS would be ignoring the synchronous +transaction demands of applications such as databases or NFS. Administrators +should only use this option when the risks are understood. +.RE + +.sp +.ne 2 +.na \fB\fBversion\fR=\fB1\fR | \fB2\fR | \fBcurrent\fR\fR .ad .sp .6 .RS 4n The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See the \fBzfs upgrade\fR command. .RE .sp .ne 2 .mk .na \fB\fBvolsize\fR=\fIsize\fR\fR .ad .sp .6 .RS 4n For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. For storage pools with a version number of 9 or higher, a \fBrefreservation\fR is set instead. Any changes to \fBvolsize\fR are reflected in an equivalent change to the reservation (or \fBrefreservation\fR). The \fBvolsize\fR can only be set to a multiple of \fBvolblocksize\fR, and cannot be zero. .sp The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size. .sp Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the \fBzfs create -V\fR command, or by changing the reservation after the volume has been created. A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation. .RE .sp .ne 2 .mk .na \fB\fBvscan\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is \fBoff\fR. .RE .sp .ne 2 .mk .na \fB\fBxattr\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether extended attributes are enabled for this file system. The default value is \fBon\fR. .RE .sp .ne 2 .mk .na \fB\fBzoned\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Controls whether the dataset is managed from a non-global zone. Zones are a Solaris feature and are not relevant on Linux. The default value is \fBoff\fR. .RE .sp .LP The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the \fBzfs create\fR or \fBzpool create\fR commands, these properties are inherited from the parent dataset. If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties. .sp .ne 2 .mk .na \fB\fBcasesensitivity\fR=\fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR .ad .sp .6 .RS 4n Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the \fBcasesensitivity\fR property is \fBsensitive\fR. Traditionally, UNIX and POSIX file systems have case-sensitive file names. .sp The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system that supports mixed behavior is limited to the Solaris CIFS server product. For more information about the \fBmixed\fR value behavior, see the \fISolaris ZFS Administration Guide\fR. .RE .sp .ne 2 .mk .na \fB\fBnormalization\fR = \fBnone\fR | \fBformC\fR | \fBformD\fR | \fBformKC\fR | \fBformKD\fR\fR .ad .sp .6 .RS 4n Indicates whether the file system should perform a \fBunicode\fR normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison process. If this property is set to a legal value other than \fBnone\fR, and the \fButf8only\fR property was left unspecified, the \fButf8only\fR property is automatically set to \fBon\fR. The default value of the \fBnormalization\fR property is \fBnone\fR. This property cannot be changed after the file system is created. .RE .sp .ne 2 .mk .na \fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR .ad .sp .6 .RS 4n Indicates whether the file system should reject file names that include characters that are not present in the \fBUTF-8\fR character code set. If this property is explicitly set to \fBoff\fR, the normalization property must either not be explicitly set or be set to \fBnone\fR. The default value for the \fButf8only\fR property is \fBoff\fR. This property cannot be changed after the file system is created. .RE .sp .LP The \fBcasesensitivity\fR, \fBnormalization\fR, and \fButf8only\fR properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration feature. .SS "Temporary Mount Point Properties" .sp .LP When a file system is mounted, either through \fBmount\fR(8) for legacy mounts or the \fBzfs mount\fR command for normal file systems, its mount options are set according to its properties. The correlation between properties and mount options is as follows: .sp .in +2 .nf PROPERTY MOUNT OPTION devices devices/nodevices exec exec/noexec readonly ro/rw setuid setuid/nosetuid xattr xattr/noxattr .fi .in -2 .sp .sp .LP In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for \fBnodevices,nosetuid\fR. These properties are reported as "temporary" by the \fBzfs get\fR command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings. .SS "User Properties" .sp .LP In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets (file systems, volumes, and snapshots). .sp .LP User property names must contain a colon (\fB:\fR) character to distinguish them from native properties. They may contain lowercase letters, numbers, and the following punctuation characters: colon (\fB:\fR), dash (\fB-\fR), period (\fB\&.\fR), and underscore (\fB_\fR). The expected convention is that the property name is divided into two portions such as \fImodule\fR\fB:\fR\fIproperty\fR, but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters, and cannot begin with a dash (\fB-\fR). .sp .LP When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for different purposes. For example, property names beginning with \fBcom.sun\fR. are reserved for use by Oracle Corporation (which acquired Sun Microsystems). .sp .LP The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties (\fBzfs list\fR, \fBzfs get\fR, \fBzfs set\fR, and so forth) can be used to manipulate both native properties and user properties. Use the \fBzfs inherit\fR command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters. .SS "ZFS Volumes as Swap" .sp .LP Do not swap to a file on a \fBZFS\fR file system. A \fBZFS\fR swap file configuration is not supported. .SH SUBCOMMANDS .sp .LP All subcommands that modify state are logged persistently to the pool in their original form. .sp .ne 2 .mk .na \fB\fBzfs ?\fR\fR .ad .sp .6 .RS 4n Displays a help message. .RE .sp .ne 2 .mk .na \fB\fBzfs create\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR\fR .ad .sp .6 .RS 4n Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the \fBmountpoint\fR property inherited from the parent. .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR .ad .sp .6 .RS 4n Sets the specified property as if the command \fBzfs set\fR \fIproperty\fR=\fIvalue\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs create\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR\fR .ad .sp .6 .RS 4n Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/\fR\fIpath\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created. .sp \fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR. .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully. .RE .sp .ne 2 .mk .na \fB\fB-s\fR\fR .ad .sp .6 .RS 4n Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native Properties section for more information about sparse volumes. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR .ad .sp .6 .RS 4n Sets the specified property as if the \fBzfs set\fR \fIproperty\fR=\fIvalue\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options. .RE .sp .ne 2 .mk .na \fB\fB-b\fR \fIblocksize\fR\fR .ad .sp .6 .RS 4n Equivalent to \fB-o\fR \fBvolblocksize\fR=\fIblocksize\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined. .RE .RE .sp .ne 2 .mk .na -\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR\fR +\fBzfs destroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR .ad .sp .6 .RS 4n Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children or clones). .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively destroy all children. .RE .sp .ne 2 .mk .na \fB\fB-R\fR\fR .ad .sp .6 .RS 4n Recursively destroy all dependents, including cloned file systems outside the target hierarchy. .RE .sp .ne 2 .mk .na \fB\fB-f\fR\fR .ad .sp .6 .RS 4n Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems. .RE +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") deletion. No data will be deleted. This is +useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what +data would be deleted. +.RE + +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the deleted data. +.RE + +.sp +.ne 2 +.na +\fB\fB-v\fR\fR +.ad +.sp .6 +.RS 4n +Print verbose information about the deleted data. +.RE +.sp + Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. .RE .sp .ne 2 .mk .na -\fB\fBzfs destroy\fR [\fB-rRd\fR] \fIsnapshot\fR\fR +\fBzfs destroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] .ad .sp .6 .RS 4n -The given snapshot is destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero. +The given snapshots are destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero. +.sp +If a snapshot does not qualify for immediate destruction, it is marked for deferred destruction. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed. +.sp +An inclusive range of snapshots may be specified by separating the +first and last snapshots with a percent sign. +The first and/or last snapshots may be left blank, in which case the +filesystem's oldest or newest snapshot will be implied. .sp -If the snapshot does not qualify for immediate destruction, it is marked for deferred destruction. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed. +Multiple snapshots +(or ranges of snapshots) of the same filesystem or volume may be specified +in a comma-separated list of snapshots. +Only the snapshot's short name (the +part after the \fB@\fR) should be specified when using a range or +comma-separated list to identify multiple snapshots. .sp .ne 2 .mk .na \fB\fB-d\fR\fR .ad .sp .6 .RS 4n Defer snapshot deletion. .RE .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Destroy (or mark for deferred destruction) all snapshots with this name in descendent file systems. .RE .sp .ne 2 .mk .na \fB\fB-R\fR\fR .ad .sp .6 .RS 4n Recursively destroy all dependents. .RE +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") deletion. No data will be deleted. This is +useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what +data would be deleted. +.RE + +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the deleted data. +.RE + +.sp +.ne 2 +.na +\fB\fB-v\fR\fR +.ad +.sp .6 +.RS 4n +Print verbose information about the deleted data. +.RE + +.sp +Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR +options, as they can destroy large portions of a pool and cause unexpected +behavior for mounted file systems in use. +.RE + .RE .sp .ne 2 .mk .na \fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR .ad .sp .6 .RS 4n Creates a snapshot with the given name. All previous modifications by successful system calls to the file system are part of the snapshot. See the "Snapshots" section for details. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively create snapshots of all descendent datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR .ad .sp .6 .RS 4n Sets the specified property; see \fBzfs create\fR for details. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option. .sp The \fB-rR\fR options do not recursively destroy the child snapshots of a recursive snapshot. Only the top-level recursive snapshot is destroyed by either of these options. To completely roll back a recursive snapshot, you must rollback the individual child snapshots. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively destroy any snapshots more recent than the one specified. .RE .sp .ne 2 .mk .na \fB\fB-R\fR\fR .ad .sp .6 .RS 4n Recursively destroy any more recent snapshots, as well as any clones of those snapshots. .RE .sp .ne 2 .mk .na \fB\fB-f\fR\fR .ad .sp .6 .RS 4n Used with the \fB-R\fR option to force an unmount of any clone file systems that are to be destroyed. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs clone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original. .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. If the target filesystem or volume already exists, the operation completes successfully. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR .ad .sp .6 .RS 4n Sets the specified property; see \fBzfs create\fR for details. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs promote\fR \fIclone-filesystem\fR\fR .ad .sp .6 .RS 4n Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the origin file system becomes a clone of the specified file system. .sp The snapshot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the origin file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The \fBrename\fR subcommand can be used to rename any conflicting snapshots. .RE .sp .ne 2 .mk .na \fB\fBzfs rename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR .ad .br .na \fB\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR .ad .br .na \fB\fBzfs rename\fR [\fB-p\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point. .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Creates all the nonexistent parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs rename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Recursively rename the snapshots of all descendent datasets. Snapshots are the only dataset that can be renamed recursively. .RE .sp .ne 2 .mk .na \fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,\fI\&...\fR]] [ \fB-t\fR \fItype\fR[,\fI\&...\fR]] [ \fB-s\fR \fIproperty\fR ] ... [ \fB-S\fR \fIproperty\fR ] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fIsnap\fR] ...\fR .ad .sp .6 .RS 4n Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed, \fBname,used,available,referenced,mountpoint\fR. .sp .ne 2 .mk .na \fB\fB-H\fR\fR .ad .sp .6 .RS 4n Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space. .RE .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively display any children of the dataset on the command line. .RE .sp .ne 2 .mk .na \fB\fB-d\fR \fIdepth\fR\fR .ad .sp .6 .RS 4n Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIproperty\fR\fR .ad .sp .6 .RS 4n A comma-separated list of properties to display. The property must be: .RS +4 .TP .ie t \(bu .el o One of the properties described in the "Native Properties" section .RE .RS +4 .TP .ie t \(bu .el o A user property .RE .RS +4 .TP .ie t \(bu .el o The value \fBname\fR to display the dataset name .RE .RS +4 .TP .ie t \(bu .el o The value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for specifying \fB-o name,avail,used,usedsnap,usedds,usedrefreserv,usedchild\fR \fB-t filesystem,volume\fR syntax. .RE .RE .sp .ne 2 .mk .na \fB\fB-s\fR \fIproperty\fR\fR .ad .sp .6 .RS 4n A property for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance. .sp The following is a list of sorting criteria: .RS +4 .TP .ie t \(bu .el o Numeric types sort in numeric order. .RE .RS +4 .TP .ie t \(bu .el o String types sort in alphabetical order. .RE .RS +4 .TP .ie t \(bu .el o Types inappropriate for a row sort that row to the literal bottom, regardless of the specified ordering. .RE .RS +4 .TP .ie t \(bu .el o If no sorting options are specified the existing behavior of \fBzfs list\fR is preserved. .RE .RE .sp .ne 2 .mk .na \fB\fB-S\fR \fIproperty\fR\fR .ad .sp .6 .RS 4n Same as the \fB-s\fR option, but sorts by property in descending order. .RE .sp .ne 2 .mk .na \fB\fB-t\fR \fItype\fR\fR .ad .sp .6 .RS 4n A comma-separated list of types to display, where \fItype\fR is one of \fBfilesystem\fR, \fBsnapshot\fR , \fBvolume\fR, or \fBall\fR. For example, specifying \fB-t snapshot\fR displays only snapshots. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs set\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR .ad .sp .6 .RS 4n Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). User properties can be set on snapshots. For more information, see the "User Properties" section. .RE .sp .ne 2 .mk .na \fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR \fIfield\fR[,...] [\fB-s\fR \fIsource\fR[,...] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR .ad .sp .6 .RS 4n Displays properties for the given datasets. If no datasets are specified, then the command displays properties for all datasets on the system. For each property, the following columns are displayed: .sp .in +2 .nf name Dataset name property Property name value Property value source Property source. Can either be local, default, temporary, inherited, or none (-). .fi .in -2 .sp All columns are displayed by default, though this can be controlled by using the \fB-o\fR option. This command takes a comma-separated list of properties as described in the "Native Properties" and "User Properties" sections. .sp The special value \fBall\fR can be used to display all properties that apply to the given dataset's type (filesystem, volume, or snapshot). .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively display properties for any children. .RE .sp .ne 2 .mk .na \fB\fB-d\fR \fIdepth\fR\fR .ad .sp .6 .RS 4n Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children. .RE .sp .ne 2 .mk .na \fB\fB-H\fR\fR .ad .sp .6 .RS 4n Display output in a form more easily parsed by scripts. Any headers are omitted, and fields are explicitly separated by a single tab instead of an arbitrary amount of space. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIfield\fR\fR .ad .sp .6 .RS 4n A comma-separated list of columns to display. \fBname,property,value,source\fR is the default value. .RE .sp .ne 2 .mk .na \fB\fB-s\fR \fIsource\fR\fR .ad .sp .6 .RS 4n A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: \fBlocal,default,inherited,temporary,none\fR. The default value is all sources. .RE .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Display numbers in parseable (exact) values. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs inherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR .ad .sp .6 .RS 4n Clears the specified property, causing it to be inherited from an ancestor. If no ancestor has the property set, then the default value is used. See the "Properties" section for a listing of default values, and details on which properties can be inherited. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively inherit the given property for all children. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs upgrade\fR [\fB-v\fR]\fR .ad .sp .6 .RS 4n Displays a list of file systems that are not the most recent version. .RE .sp .ne 2 .mk .na \fB\fBzfs upgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] [\fB-a\fR | \fIfilesystem\fR]\fR .ad .sp .6 .RS 4n Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems cannot be accessed on systems running older versions of the software. .sp In general, the file system version is independent of the pool version. See \fBzpool\fR(8) for information on the \fBzpool upgrade\fR command. .sp In some cases, the file system version and the pool version are interrelated and the pool version must be upgraded before the file system version can be upgraded. .sp .ne 2 .mk .na \fB\fB-a\fR\fR .ad .sp .6 .RS 4n Upgrade all file systems on all imported pools. .RE .sp .ne 2 .mk .na \fB\fIfilesystem\fR\fR .ad .sp .6 .RS 4n Upgrade the specified file system. .RE .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Upgrade the specified file system and all descendent file systems .RE .sp .ne 2 .mk .na \fB\fB-V\fR \fIversion\fR\fR .ad .sp .6 .RS 4n Upgrade to the specified \fIversion\fR. If the \fB-V\fR flag is not specified, this command upgrades to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs userspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR]... [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR | \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Displays space consumed by, and quotas on, each user in the specified filesystem or snapshot. This corresponds to the \fBuserused@\fR\fIuser\fR and \fBuserquota@\fR\fIuser\fR properties. .sp .ne 2 .mk .na \fB\fB-n\fR\fR .ad .sp .6 .RS 4n Print numeric ID instead of user/group name. .RE .sp .ne 2 .mk .na \fB\fB-H\fR\fR .ad .sp .6 .RS 4n Do not print headers, use tab-delimited output. .RE .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Use exact (parseable) numeric output. .RE .sp .ne 2 .mk .na \fB\fB-o\fR \fIfield\fR[,...]\fR .ad .sp .6 .RS 4n Display only the specified fields from the following set, \fBtype,name,used,quota\fR.The default is to display all fields. .RE .sp .ne 2 .mk .na \fB\fB-s\fR \fIfield\fR\fR .ad .sp .6 .RS 4n Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is \fB-s type\fR \fB-s name\fR. .RE .sp .ne 2 .mk .na \fB\fB-S\fR \fIfield\fR\fR .ad .sp .6 .RS 4n Sort by this field in reverse order. See \fB-s\fR. .RE .sp .ne 2 .mk .na \fB\fB-t\fR \fItype\fR[,...]\fR .ad .sp .6 .RS 4n Print only the specified types from the following set, \fBall,posixuser,smbuser,posixgroup,smbgroup\fR. .sp The default is \fB-t posixuser,smbuser\fR .sp The default can be changed to include group types. .RE .sp .ne 2 .mk .na \fB\fB-i\fR\fR .ad .sp .6 .RS 4n Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, the \fB-i\fR option will report that the POSIX entity has the total usage and quota for both. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs groupspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR]... [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR | \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are \fB-t posixgroup,smbgroup\fR. .sp .in +2 .nf - .fi .in -2 .sp .RE .sp .ne 2 .mk .na \fB\fBzfs mount\fR\fR .ad .sp .6 .RS 4n Displays all \fBZFS\fR file systems currently mounted. .RE .sp .ne 2 .mk .na \fB\fBzfs mount\fR [\fB-vO\fR] [\fB-o\fR \fIoptions\fR] \fB-a\fR | \fIfilesystem\fR\fR .ad .sp .6 .RS 4n Mounts \fBZFS\fR file systems. Invoked automatically as part of the boot process. .sp .ne 2 .mk .na \fB\fB-o\fR \fIoptions\fR\fR .ad .sp .6 .RS 4n An optional, comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details. .RE .sp .ne 2 .mk .na \fB\fB-O\fR\fR .ad .sp .6 .RS 4n Perform an overlay mount. See \fBmount\fR(8) for more information. .RE .sp .ne 2 .mk .na \fB\fB-v\fR\fR .ad .sp .6 .RS 4n Report mount progress. .RE .sp .ne 2 .mk .na \fB\fB-a\fR\fR .ad .sp .6 .RS 4n Mount all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. .RE .sp .ne 2 .mk .na \fB\fIfilesystem\fR\fR .ad .sp .6 .RS 4n Mount the specified filesystem. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs unmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR .ad .sp .6 .RS 4n Unmounts currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process. .sp .ne 2 .mk .na \fB\fB-f\fR\fR .ad .sp .6 .RS 4n Forcefully unmount the file system, even if it is currently in use. .RE .sp .ne 2 .mk .na \fB\fB-a\fR\fR .ad .sp .6 .RS 4n Unmount all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. .RE .sp .ne 2 .mk .na \fB\fIfilesystem\fR|\fImountpoint\fR\fR .ad .sp .6 .RS 4n Unmount the specified filesystem. The command can also be given a path to a \fBZFS\fR file system mount point on the system. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs share\fR \fB-a\fR | \fIfilesystem\fR\fR .ad .sp .6 .RS 4n Shares available \fBZFS\fR file systems. .sp .ne 2 .mk .na \fB\fB-a\fR\fR .ad .sp .6 .RS 4n Share all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. .RE .sp .ne 2 .mk .na \fB\fIfilesystem\fR\fR .ad .sp .6 .RS 4n Share the specified filesystem according to the \fBsharenfs\fR and \fBsharesmb\fR properties. File systems are shared when the \fBsharenfs\fR or \fBsharesmb\fR property is set. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs unshare\fR \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR .ad .sp .6 .RS 4n Unshares currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process. .sp .ne 2 .mk .na \fB\fB-a\fR\fR .ad .sp .6 .RS 4n Unshare all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. .RE .sp .ne 2 .mk .na \fB\fIfilesystem\fR|\fImountpoint\fR\fR .ad .sp .6 .RS 4n Unshare the specified filesystem. The command can also be given a path to a \fBZFS\fR file system shared on the system. .RE .RE .sp .ne 2 .mk .na -\fB\fBzfs send\fR [\fB-vRDp\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR\fR +\fBzfs send\fR [\fB-DnPpRrv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .ad .sp .6 .RS 4n Creates a stream representation of the second \fIsnapshot\fR, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated. .sp .ne 2 .mk .na \fB\fB-i\fR \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Generate an incremental stream from the first \fIsnapshot\fR to the second \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be specified as the last component of the snapshot name (for example, the part after the \fB@\fR), and it is assumed to be from the same file system as the second \fIsnapshot\fR. .sp If the destination is a clone, the source may be the origin snapshot, which must be fully specified (for example, \fBpool/fs@origin\fR, not just \fB@origin\fR). .RE .sp .ne 2 .mk .na \fB\fB-I\fR \fIsnapshot\fR\fR .ad .sp .6 .RS 4n Generate a stream package that sends all intermediary snapshots from the first snapshot to the second snapshot. For example, \fB-I @a fs@d\fR is similar to \fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR. The incremental source snapshot may be specified as with the \fB-i\fR option. .RE .sp .ne 2 .mk .na \fB\fB-v\fR\fR .ad .sp .6 .RS 4n Print verbose information about the stream package generated. .RE .sp .ne 2 .mk .na \fB\fB-R\fR\fR .ad .sp .6 .RS 4n Generate a replication stream package, which will replicate the specified filesystem, and all descendent file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones are preserved. .sp If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag, an incremental replication stream is generated. The current values of properties, and current snapshot and file system names are set when the stream is received. If the \fB-F\fR flag is specified when this stream is received, snapshots and file systems that do not exist on the sending side are destroyed. .RE .sp .ne 2 .mk .na \fB\fB-D\fR\fR .ad .sp .6 .RS 4n This option will cause dedup processing to be performed on the data being written to a send stream. Dedup processing is optional because it isn't always appropriate (some kinds of data have very little duplication) and it has significant costs: the checksumming required to detect duplicate blocks is CPU-intensive and the data that must be maintained while the stream is being processed can occupy a very large amount of memory. .sp Duplicate blocks are detected by calculating a cryptographically strong checksum on each data block. Blocks that have the same checksum are presumed to be identical. The checksum type used at this time is SHA256. However, the stream format contains a field which identifies the checksum type, permitting other checksums to be used in the future. .RE .sp .ne 2 .mk .na \fB\fB-p\fR\fR .ad .sp .6 .RS 4n Include properties in the send stream without the -R option. .RE The format of the stream is committed. You will be able to receive your streams on future versions of \fBZFS\fR. .RE .sp .ne 2 .mk .na \fB\fBzfs receive\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR .ad .br .na \fB\fBzfs receive\fR [\fB-vnFu\fR] \fB-d\fR \fIfilesystem\fR\fR .ad .sp .6 .RS 4n Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the \fBzfs send\fR subcommand, which by default creates a full stream. \fBzfs recv\fR can be used as an alias for \fBzfs receive\fR. .sp If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and recreated, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation. .sp When a snapshot replication package stream that is generated by using the \fBzfs send\fR \fB-R\fR command is received, any snapshots that do not exist on the sending location are destroyed by using the \fBzfs destroy\fR \fB-d\fR command. .sp The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option. .sp If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR. If the \fB-d\fR option is specified, the snapshot name is determined by appending the sent snapshot's name to the specified \fIfilesystem\fR. If the \fB-d\fR option is specified, any required file systems within the specified one are created. .sp .ne 2 .mk .na \fB\fB-d\fR\fR .ad .sp .6 .RS 4n Use the name of the sent snapshot to determine the name of the new snapshot as described in the paragraph above. .RE .sp .ne 2 .mk .na \fB\fB-u\fR\fR .ad .sp .6 .RS 4n File system that is associated with the received stream is not mounted. .RE .sp .ne 2 .mk .na +\fB\fB-D\fR\fR +.ad +.sp .6 +.RS 4n +Generate a deduplicated stream. Blocks which would have been sent multiple +times in the send stream will only be sent once. The receiving system must +also support this feature to recieve a deduplicated stream. This flag can +be used regardless of the dataset's \fBdedup\fR property, but performance +will be much better if the filesystem uses a dedup-capable checksum (eg. +\fBsha256\fR). +.RE + +.sp +.ne 2 +.na +\fB\fB-r\fR\fR +.ad +.sp .6 +.RS 4n +Recursively send all descendant snapshots. This is similar to the \fB-R\fR +flag, but information about deleted and renamed datasets is not included, and +property information is only included if the \fB-p\fR flag is specified. +.RE + +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Include the dataset's properties in the stream. This flag is implicit when +\fB-R\fR is specified. The receiving system must also support this feature. +.RE + +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") send. Do not generate any actual send data. This is +useful in conjunction with the \fB-v\fR or \fB-P\fR flags to determine what +data will be sent. +.RE + +.sp +.ne 2 +.na +\fB\fB-P\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the stream package generated. +.RE + +.sp +.ne 2 +.na \fB\fB-v\fR\fR .ad .sp .6 .RS 4n Print verbose information about the stream and the time required to perform the receive operation. .RE .sp .ne 2 .mk .na \fB\fB-n\fR\fR .ad .sp .6 .RS 4n Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use. .RE .sp .ne 2 .mk .na \fB\fB-F\fR\fR .ad .sp .6 .RS 4n Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs allow\fR \fIfilesystem\fR | \fIvolume\fR\fR .ad .sp .6 .RS 4n Displays permissions that have been delegated on the specified filesystem or volume. See the other forms of \fBzfs allow\fR for more information. .RE .sp .ne 2 .mk .na \fB\fBzfs allow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR| \fIvolume\fR\fR .ad .br .na \fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR | \fIvolume\fR\fR .ad .sp .6 .RS 4n Delegates \fBZFS\fR administration permission for the file systems to non-privileged users. .sp .ne 2 .mk .na \fB[\fB-ug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...]\fR .ad .sp .6 .RS 4n Specifies to whom the permissions are delegated. Multiple entities can be specified as a comma-separated list. If neither of the \fB-ug\fR options are specified, then the argument is interpreted preferentially as the keyword "everyone", then as a user name, and lastly as a group name. To specify a user or group named "everyone", use the \fB-u\fR or \fB-g\fR options. To specify a group with the same name as a user, use the \fB-g\fR options. .RE .sp .ne 2 .mk .na \fB[\fB-e\fR] \fIperm\fR|@\fIsetname\fR[,...]\fR .ad .sp .6 .RS 4n Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an at sign (\fB@\fR) , may be specified. See the \fB-s\fR form below for details. .RE .sp .ne 2 .mk .na \fB[\fB-ld\fR] \fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Specifies where the permissions are delegated. If neither of the \fB-ld\fR options are specified, or both are, then the permissions are allowed for the file system or volume, and all of its descendents. If only the \fB-l\fR option is used, then is allowed "locally" only for the specified file system. If only the \fB-d\fR option is used, then is allowed only for the descendent file systems. .RE .RE .sp .LP Permissions are generally the ability to use a \fBZFS\fR subcommand or change a \fBZFS\fR property. The following permissions are available: .sp .in +2 .nf NAME TYPE NOTES allow subcommand Must also have the permission that is being allowed clone subcommand Must also have the 'create' ability and 'mount' ability in the origin file system create subcommand Must also have the 'mount' ability destroy subcommand Must also have the 'mount' ability mount subcommand Allows mount/umount of ZFS datasets promote subcommand Must also have the 'mount' and 'promote' ability in the origin file system receive subcommand Must also have the 'mount' and 'create' ability rename subcommand Must also have the 'mount' and 'create' ability in the new parent rollback subcommand Must also have the 'mount' ability send subcommand share subcommand Allows sharing file systems over NFS or SMB protocols snapshot subcommand Must also have the 'mount' ability groupquota other Allows accessing any groupquota@... property groupused other Allows reading any groupused@... property userprop other Allows changing any user property userquota other Allows accessing any userquota@... property userused other Allows reading any userused@... property aclinherit property aclmode property atime property canmount property casesensitivity property checksum property compression property copies property devices property exec property mountpoint property nbmand property normalization property primarycache property quota property readonly property recordsize property refquota property refreservation property reservation property secondarycache property setuid property shareiscsi property sharenfs property sharesmb property snapdir property utf8only property version property volblocksize property volsize property vscan property xattr property zoned property .fi .in -2 .sp .sp .ne 2 .mk .na \fB\fBzfs allow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Sets "create time" permissions. These permissions are granted (locally) to the creator of any newly-created descendent file system. .RE .sp .ne 2 .mk .na \fB\fBzfs allow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Defines or adds permissions to a permission set. The set can be used by other \fBzfs allow\fR commands for the specified file system and its descendents. Sets are evaluated dynamically, so changes to a set are immediately reflected. Permission sets follow the same naming restrictions as ZFS file systems, but the name must begin with an "at sign" (\fB@\fR), and can be no more than 64 characters long. .RE .sp .ne 2 .mk .na \fB\fBzfs unallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[, ...]] \fIfilesystem\fR|\fIvolume\fR\fR .ad .br .na \fB\fBzfs unallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR [,...]] \fIfilesystem\fR|\fIvolume\fR\fR .ad .br .na \fB\fBzfs unallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR .ad .br .na \fB\fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Removes permissions that were granted with the \fBzfs allow\fR command. No permissions are explicitly denied, so other permissions granted are still in effect. For example, if the permission is granted by an ancestor. If no permissions are specified, then all permissions for the specified \fIuser\fR, \fIgroup\fR, or \fIeveryone\fR are removed. Specifying "everyone" (or using the \fB-e\fR option) only removes the permissions that were granted to "everyone", not all permissions for every user and group. See the \fBzfs allow\fR command for a description of the \fB-ldugec\fR options. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively remove the permissions from this file system and all descendents. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR .ad .br .na \fB\fIfilesystem\fR|\fIvolume\fR\fR .ad .sp .6 .RS 4n Removes permissions from a permission set. If no permissions are specified, then all permissions are removed, thus removing the set entirely. .RE .sp .ne 2 .mk .na \fB\fBzfs hold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR .ad .sp .6 .RS 4n Adds a single reference, named with the \fItag\fR argument, to the specified snapshot or snapshots. Each snapshot has its own tag namespace, and tags must be unique within that space. .sp If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Specifies that a hold with the given tag is applied recursively to the snapshots of all descendent file systems. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs holds\fR [\fB-r\fR] \fIsnapshot\fR...\fR .ad .sp .6 .RS 4n Lists all existing user references for the given snapshot or snapshots. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Lists the holds that are set on the named descendent snapshots, in addition to listing the holds on the named snapshot. .RE .RE .sp .ne 2 .mk .na \fB\fBzfs release\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR .ad .sp .6 .RS 4n Removes a single reference, named with the \fItag\fR argument, from the specified snapshot or snapshots. The tag must already exist for each snapshot. .sp If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR. .sp .ne 2 .mk .na \fB\fB-r\fR\fR .ad .sp .6 .RS 4n Recursively releases a hold with the given tag on the snapshots of all descendent file systems. .RE .RE .SH EXAMPLES .LP \fBExample 1 \fRCreating a ZFS File System Hierarchy .sp .LP The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and is automatically inherited by the child file system. .sp .in +2 .nf # \fBzfs create pool/home\fR # \fBzfs set mountpoint=/export/home pool/home\fR # \fBzfs create pool/home/bob\fR .fi .in -2 .sp .LP \fBExample 2 \fRCreating a ZFS Snapshot .sp .LP The following command creates a snapshot named \fByesterday\fR. This snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of the \fBpool/home/bob\fR file system. .sp .in +2 .nf # \fBzfs snapshot pool/home/bob@yesterday\fR .fi .in -2 .sp .LP \fBExample 3 \fRCreating and Destroying Multiple Snapshots .sp .LP The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR and all of its descendent file systems. Each snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its file system. The second command destroys the newly created snapshots. .sp .in +2 .nf # \fBzfs snapshot -r pool/home@yesterday\fR # \fBzfs destroy -r pool/home@yesterday\fR .fi .in -2 .sp .LP \fBExample 4 \fRDisabling and Enabling File System Compression .sp .LP The following command disables the \fBcompression\fR property for all file systems under \fBpool/home\fR. The next command explicitly enables \fBcompression\fR for \fBpool/home/anne\fR. .sp .in +2 .nf # \fBzfs set compression=off pool/home\fR # \fBzfs set compression=on pool/home/anne\fR .fi .in -2 .sp .LP \fBExample 5 \fRListing ZFS Datasets .sp .LP The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR. The default is \fBoff\fR. See \fBzpool\fR(8) for more information on pool properties. .sp .in +2 .nf # \fBzfs list\fR NAME USED AVAIL REFER MOUNTPOINT pool 450K 457G 18K /pool pool/home 315K 457G 21K /export/home pool/home/anne 18K 457G 18K /export/home/anne pool/home/bob 276K 457G 276K /export/home/bob .fi .in -2 .sp .LP \fBExample 6 \fRSetting a Quota on a ZFS File System .sp .LP The following command sets a quota of 50 Gbytes for \fBpool/home/bob\fR. .sp .in +2 .nf # \fBzfs set quota=50G pool/home/bob\fR .fi .in -2 .sp .LP \fBExample 7 \fRListing ZFS Properties .sp .LP The following command lists all properties for \fBpool/home/bob\fR. .sp .in +2 .nf # \fBzfs get all pool/home/bob\fR NAME PROPERTY VALUE SOURCE pool/home/bob type filesystem - pool/home/bob creation Tue Jul 21 15:53 2009 - pool/home/bob used 21K - pool/home/bob available 20.0G - pool/home/bob referenced 21K - pool/home/bob compressratio 1.00x - pool/home/bob mounted yes - pool/home/bob quota 20G local pool/home/bob reservation none default pool/home/bob recordsize 128K default pool/home/bob mountpoint /pool/home/bob default pool/home/bob sharenfs off default pool/home/bob checksum on default pool/home/bob compression on local pool/home/bob atime on default pool/home/bob devices on default pool/home/bob exec on default pool/home/bob setuid on default pool/home/bob readonly off default pool/home/bob zoned off default pool/home/bob snapdir hidden default pool/home/bob aclmode groupmask default pool/home/bob aclinherit restricted default pool/home/bob canmount on default pool/home/bob shareiscsi off default pool/home/bob xattr on default pool/home/bob copies 1 default pool/home/bob version 4 - pool/home/bob utf8only off - pool/home/bob normalization none - pool/home/bob casesensitivity sensitive - pool/home/bob vscan off default pool/home/bob nbmand off default pool/home/bob sharesmb off default pool/home/bob refquota none default pool/home/bob refreservation none default pool/home/bob primarycache all default pool/home/bob secondarycache all default pool/home/bob usedbysnapshots 0 - pool/home/bob usedbydataset 21K - pool/home/bob usedbychildren 0 - pool/home/bob usedbyrefreservation 0 - .fi .in -2 .sp .sp .LP The following command gets a single property value. .sp .in +2 .nf # \fBzfs get -H -o value compression pool/home/bob\fR on .fi .in -2 .sp .sp .LP The following command lists all properties with local settings for \fBpool/home/bob\fR. .sp .in +2 .nf # \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR NAME PROPERTY VALUE pool/home/bob quota 20G pool/home/bob compression on .fi .in -2 .sp .LP \fBExample 8 \fRRolling Back a ZFS File System .sp .LP The following command reverts the contents of \fBpool/home/anne\fR to the snapshot named \fByesterday\fR, deleting all intermediate snapshots. .sp .in +2 .nf # \fBzfs rollback -r pool/home/anne@yesterday\fR .fi .in -2 .sp .LP \fBExample 9 \fRCreating a ZFS Clone .sp .LP The following command creates a writable file system whose initial contents are the same as \fBpool/home/bob@yesterday\fR. .sp .in +2 .nf # \fBzfs clone pool/home/bob@yesterday pool/clone\fR .fi .in -2 .sp .LP \fBExample 10 \fRPromoting a ZFS Clone .sp .LP The following commands illustrate how to test out changes to a file system, and then replace the original file system with the changed one, using clones, clone promotion, and renaming: .sp .in +2 .nf # \fBzfs create pool/project/production\fR populate /pool/project/production with data # \fBzfs snapshot pool/project/production@today\fR # \fBzfs clone pool/project/production@today pool/project/beta\fR make changes to /pool/project/beta and test them # \fBzfs promote pool/project/beta\fR # \fBzfs rename pool/project/production pool/project/legacy\fR # \fBzfs rename pool/project/beta pool/project/production\fR once the legacy version is no longer needed, it can be destroyed # \fBzfs destroy pool/project/legacy\fR .fi .in -2 .sp .LP \fBExample 11 \fRInheriting ZFS Properties .sp .LP The following command causes \fBpool/home/bob\fR and \fBpool/home/anne\fR to inherit the \fBchecksum\fR property from their parent. .sp .in +2 .nf # \fBzfs inherit checksum pool/home/bob pool/home/anne\fR .fi .in -2 .sp .LP \fBExample 12 \fRRemotely Replicating ZFS Data .sp .LP The following commands send a full stream and then an incremental stream to a remote machine, restoring them into \fBpoolB/received/fs@a\fRand \fBpoolB/received/fs@b\fR, respectively. \fBpoolB\fR must contain the file system \fBpoolB/received\fR, and must not initially contain \fBpoolB/received/fs\fR. .sp .in +2 .nf # \fBzfs send pool/fs@a | \e\fR \fBssh host zfs receive poolB/received/fs@a\fR # \fBzfs send -i a pool/fs@b | ssh host \e\fR \fBzfs receive poolB/received/fs\fR .fi .in -2 .sp .LP \fBExample 13 \fRUsing the \fBzfs receive\fR \fB-d\fR Option .sp .LP The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from the name of the sent snapshot. \fBpoolB\fR must contain the file system \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created as an empty file system. .sp .in +2 .nf # \fBzfs send poolA/fsA/fsB@snap | \e ssh host zfs receive -d poolB/received\fR .fi .in -2 .sp .LP \fBExample 14 \fRSetting User Properties .sp .LP The following example sets the user-defined \fBcom.example:department\fR property for a dataset. .sp .in +2 .nf # \fBzfs set com.example:department=12345 tank/accounting\fR .fi .in -2 .sp .LP \fBExample 15 \fRCreating a ZFS Volume as an iSCSI Target Device .sp .LP The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target. .sp .in +2 .nf # \fBzfs create -V 2g pool/volumes/vol1\fR # \fBzfs set shareiscsi=on pool/volumes/vol1\fR # \fBiscsitadm list target\fR Target: pool/volumes/vol1 iSCSI Name: iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c Connections: 0 .fi .in -2 .sp .sp .LP After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see \fBiscsitadm\fR(1M). .LP \fBExample 16 \fRPerforming a Rolling Snapshot .sp .LP The following example shows how to maintain a history of snapshots with a consistent naming scheme. To keep a week's worth of snapshots, the user destroys the oldest snapshot, renames the remaining snapshots, and then creates a new snapshot, as follows: .sp .in +2 .nf # \fBzfs destroy -r pool/users@7daysago\fR # \fBzfs rename -r pool/users@6daysago @7daysago\fR # \fBzfs rename -r pool/users@5daysago @6daysago\fR # \fBzfs rename -r pool/users@yesterday @5daysago\fR # \fBzfs rename -r pool/users@yesterday @4daysago\fR # \fBzfs rename -r pool/users@yesterday @3daysago\fR # \fBzfs rename -r pool/users@yesterday @2daysago\fR # \fBzfs rename -r pool/users@today @yesterday\fR # \fBzfs snapshot -r pool/users@today\fR .fi .in -2 .sp .LP \fBExample 17 \fRSetting \fBsharenfs\fR Property Options on a ZFS File System .sp .LP The following commands show how to set \fBsharenfs\fR property options to enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root access for system \fBneo\fR on the \fBtank/home\fR file system. .sp .in +2 .nf # \fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR .fi .in -2 .sp .sp .LP If you are using \fBDNS\fR for host name resolution, specify the fully qualified hostname. .LP \fBExample 18 \fRDelegating ZFS Administration Permissions on a ZFS Dataset .sp .LP The following example shows how to set permissions so that user \fBcindys\fR can create, destroy, mount, and take snapshots on \fBtank/cindys\fR. The permissions on \fBtank/cindys\fR are also displayed. .sp .in +2 .nf # \fBzfs allow cindys create,destroy,mount,snapshot tank/cindys\fR # \fBzfs allow tank/cindys\fR ------------------------------------------------------------- Local+Descendent permissions on (tank/cindys) user cindys create,destroy,mount,snapshot ------------------------------------------------------------- .fi .in -2 .sp .sp .LP Because the \fBtank/cindys\fR mount point permission is set to 755 by default, user \fBcindys\fR will be unable to mount file systems under \fBtank/cindys\fR. Set an \fBACL\fR similar to the following syntax to provide mount point access: .sp .in +2 .nf # \fBchmod A+user:cindys:add_subdirectory:allow /tank/cindys\fR .fi .in -2 .sp .LP \fBExample 19 \fRDelegating Create Time Permissions on a ZFS Dataset .sp .LP The following example shows how to grant anyone in the group \fBstaff\fR to create file systems in \fBtank/users\fR. This syntax also allows staff members to destroy their own file systems, but not destroy anyone else's file system. The permissions on \fBtank/users\fR are also displayed. .sp .in +2 .nf # \fB# zfs allow staff create,mount tank/users\fR # \fBzfs allow -c destroy tank/users\fR # \fBzfs allow tank/users\fR ------------------------------------------------------------- Create time permissions on (tank/users) create,destroy Local+Descendent permissions on (tank/users) group staff create,mount ------------------------------------------------------------- .fi .in -2 .sp .LP \fBExample 20 \fRDefining and Granting a Permission Set on a ZFS Dataset .sp .LP The following example shows how to define and grant a permission set on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed. .sp .in +2 .nf # \fBzfs allow -s @pset create,destroy,snapshot,mount tank/users\fR # \fBzfs allow staff @pset tank/users\fR # \fBzfs allow tank/users\fR ------------------------------------------------------------- Permission sets on (tank/users) @pset create,destroy,mount,snapshot Create time permissions on (tank/users) create,destroy Local+Descendent permissions on (tank/users) group staff @pset,create,mount ------------------------------------------------------------- .fi .in -2 .sp .LP \fBExample 21 \fRDelegating Property Permissions on a ZFS Dataset .sp .LP The following example shows to grant the ability to set quotas and reservations on the \fBusers/home\fR file system. The permissions on \fBusers/home\fR are also displayed. .sp .in +2 .nf # \fBzfs allow cindys quota,reservation users/home\fR # \fBzfs allow users/home\fR ------------------------------------------------------------- Local+Descendent permissions on (users/home) user cindys quota,reservation ------------------------------------------------------------- cindys% \fBzfs set quota=10G users/home/marks\fR cindys% \fBzfs get quota users/home/marks\fR NAME PROPERTY VALUE SOURCE users/home/marks quota 10G local .fi .in -2 .sp .LP \fBExample 22 \fRRemoving ZFS Delegated Permissions on a ZFS Dataset .sp .LP The following example shows how to remove the snapshot permission from the \fBstaff\fR group on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed. .sp .in +2 .nf # \fBzfs unallow staff snapshot tank/users\fR # \fBzfs allow tank/users\fR ------------------------------------------------------------- Permission sets on (tank/users) @pset create,destroy,mount,snapshot Create time permissions on (tank/users) create,destroy Local+Descendent permissions on (tank/users) group staff @pset,create,mount ------------------------------------------------------------- .fi .in -2 .sp .SH EXIT STATUS .sp .LP The following exit values are returned: .sp .ne 2 .mk .na \fB\fB0\fR\fR .ad .sp .6 .RS 4n Successful completion. .RE .sp .ne 2 .mk .na \fB\fB1\fR\fR .ad .sp .6 .RS 4n An error occurred. .RE .sp .ne 2 .mk .na \fB\fB2\fR\fR .ad .sp .6 .RS 4n Invalid command line options were specified. .RE .SH SEE ALSO .sp .LP \fBchmod\fR(2), \fBfsync\fR(2), \fBgzip\fR(1), \fBmount\fR(8), \fBssh\fR(1), \fBstat\fR(2), \fBwrite\fR(2), \fBzpool\fR(8) diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 9afe3d900850..0a0c25bd4175 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -1,641 +1,657 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include "zfs_prop.h" #include "zfs_deleg.h" #if defined(_KERNEL) #include #else #include #include #include #endif static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS]; /* Note this is indexed by zfs_userquota_prop_t, keep the order the same */ const char *zfs_userquota_prop_prefixes[] = { "userused@", "userquota@", "groupused@", "groupquota@" }; zprop_desc_t * zfs_prop_get_table(void) { return (zfs_prop_table); } void zfs_prop_init(void) { static zprop_index_t checksum_table[] = { { "on", ZIO_CHECKSUM_ON }, { "off", ZIO_CHECKSUM_OFF }, { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, { "sha256", ZIO_CHECKSUM_SHA256 }, { NULL } }; static zprop_index_t dedup_table[] = { { "on", ZIO_CHECKSUM_ON }, { "off", ZIO_CHECKSUM_OFF }, { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY }, { "sha256", ZIO_CHECKSUM_SHA256 }, { "sha256,verify", ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, { NULL } }; static zprop_index_t compress_table[] = { { "on", ZIO_COMPRESS_ON }, { "off", ZIO_COMPRESS_OFF }, { "lzjb", ZIO_COMPRESS_LZJB }, { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */ { "gzip-1", ZIO_COMPRESS_GZIP_1 }, { "gzip-2", ZIO_COMPRESS_GZIP_2 }, { "gzip-3", ZIO_COMPRESS_GZIP_3 }, { "gzip-4", ZIO_COMPRESS_GZIP_4 }, { "gzip-5", ZIO_COMPRESS_GZIP_5 }, { "gzip-6", ZIO_COMPRESS_GZIP_6 }, { "gzip-7", ZIO_COMPRESS_GZIP_7 }, { "gzip-8", ZIO_COMPRESS_GZIP_8 }, { "gzip-9", ZIO_COMPRESS_GZIP_9 }, { "zle", ZIO_COMPRESS_ZLE }, { NULL } }; static zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, { NULL } }; static zprop_index_t acl_inherit_table[] = { { "discard", ZFS_ACL_DISCARD }, { "noallow", ZFS_ACL_NOALLOW }, { "restricted", ZFS_ACL_RESTRICTED }, { "passthrough", ZFS_ACL_PASSTHROUGH }, { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */ { "passthrough-x", ZFS_ACL_PASSTHROUGH_X }, { NULL } }; static zprop_index_t case_table[] = { { "sensitive", ZFS_CASE_SENSITIVE }, { "insensitive", ZFS_CASE_INSENSITIVE }, { "mixed", ZFS_CASE_MIXED }, { NULL } }; static zprop_index_t copies_table[] = { { "1", 1 }, { "2", 2 }, { "3", 3 }, { NULL } }; /* * Use the unique flags we have to send to u8_strcmp() and/or * u8_textprep() to represent the various normalization property * values. */ static zprop_index_t normalize_table[] = { { "none", 0 }, { "formD", U8_TEXTPREP_NFD }, { "formKC", U8_TEXTPREP_NFKC }, { "formC", U8_TEXTPREP_NFC }, { "formKD", U8_TEXTPREP_NFKD }, { NULL } }; static zprop_index_t version_table[] = { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 }, { "5", 5 }, { "current", ZPL_VERSION }, { NULL } }; static zprop_index_t boolean_table[] = { { "off", 0 }, { "on", 1 }, { NULL } }; static zprop_index_t logbias_table[] = { { "latency", ZFS_LOGBIAS_LATENCY }, { "throughput", ZFS_LOGBIAS_THROUGHPUT }, { NULL } }; static zprop_index_t canmount_table[] = { { "off", ZFS_CANMOUNT_OFF }, { "on", ZFS_CANMOUNT_ON }, { "noauto", ZFS_CANMOUNT_NOAUTO }, { NULL } }; static zprop_index_t cache_table[] = { { "none", ZFS_CACHE_NONE }, { "metadata", ZFS_CACHE_METADATA }, { "all", ZFS_CACHE_ALL }, { NULL } }; static zprop_index_t sync_table[] = { { "standard", ZFS_SYNC_STANDARD }, { "always", ZFS_SYNC_ALWAYS }, { "disabled", ZFS_SYNC_DISABLED }, { NULL } }; static zprop_index_t xattr_table[] = { { "off", ZFS_XATTR_OFF }, { "on", ZFS_XATTR_DIR }, { "sa", ZFS_XATTR_SA }, { "dir", ZFS_XATTR_DIR }, { NULL } }; /* inherit index properties */ zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "standard | always | disabled", "SYNC", sync_table); zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM", checksum_table); zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | verify | sha256[,verify]", "DEDUP", dedup_table); zprop_register_index(ZFS_PROP_COMPRESSION, "compression", ZIO_COMPRESS_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS", compress_table); zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "hidden | visible", "SNAPDIR", snapdir_table); zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "discard | noallow | restricted | passthrough | passthrough-x", "ACLINHERIT", acl_inherit_table); zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "1 | 2 | 3", "COPIES", copies_table); zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache", ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "PRIMARYCACHE", cache_table); zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache", ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table); zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "latency | throughput", "LOGBIAS", logbias_table); zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off | dir | sa", "XATTR", xattr_table); /* inherit index (boolean) properties */ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", boolean_table); zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC", boolean_table); zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID", boolean_table); zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY", boolean_table); zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table); zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table); zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND", boolean_table); /* default index properties */ zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "1 | 2 | 3 | 4 | current", "VERSION", version_table); + "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table); zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); /* readonly index (boolean) properties */ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", boolean_table); /* set once index properties */ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "none | formC | formD | formKC | formKD", "NORMALIZATION", normalize_table); zprop_register_index(ZFS_PROP_CASE, "casesensitivity", ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table); /* set once index (boolean) properties */ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "UTF8ONLY", boolean_table); /* string properties */ zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "ORIGIN"); + zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY, + ZFS_TYPE_SNAPSHOT, "[,...]", "CLONES"); zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, " | legacy | none", "MOUNTPOINT"); zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", "SHARENFS"); zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY, ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE"); zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | sharemgr(1M) options", "SHARESMB"); zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel", ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET, "", "MLSLABEL"); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "USED"); zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "AVAIL"); zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "REFER"); zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "RATIO"); zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "REFRATIO"); zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDSNAP"); zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDDS"); zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDCHILD"); zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDREFRESERV"); zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "", "USERREFS"); + zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, + ZFS_TYPE_DATASET, "", "WRITTEN"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, " | none", "QUOTA"); zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, " | none", "RESERV"); zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT, ZFS_TYPE_VOLUME, "", "VOLSIZE"); zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, " | none", "REFQUOTA"); zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, " | none", "REFRESERV"); /* inherit number properties */ zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", SPA_MAXBLOCKSIZE, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE"); /* hidden properties */ zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG"); zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES"); zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_DATASET, "NAME"); zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS"); zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu", PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "STMF_SBD_LU"); zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "GUID"); zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "USERACCOUNTING"); zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE"); zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); /* * Property to be removed once libbe is integrated */ zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "PRIV_PROP"); /* oddball properties */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL, PROP_READONLY, ZFS_TYPE_DATASET, "", "CREATION", B_FALSE, B_TRUE, NULL); } boolean_t zfs_prop_delegatable(zfs_prop_t prop) { zprop_desc_t *pd = &zfs_prop_table[prop]; /* The mlslabel property is never delegatable. */ if (prop == ZFS_PROP_MLSLABEL) return (B_FALSE); return (pd->pd_attr != PROP_READONLY); } /* * Given a zfs dataset property name, returns the corresponding property ID. */ zfs_prop_t zfs_name_to_prop(const char *propname) { return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); } /* * For user property names, we allow all lowercase alphanumeric characters, plus * a few useful punctuation characters. */ static int valid_char(char c) { return ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' || c == ':'); } /* * Returns true if this is a valid user-defined property (one with a ':'). */ boolean_t zfs_prop_user(const char *name) { int i; char c; boolean_t foundsep = B_FALSE; for (i = 0; i < strlen(name); i++) { c = name[i]; if (!valid_char(c)) return (B_FALSE); if (c == ':') foundsep = B_TRUE; } if (!foundsep) return (B_FALSE); return (B_TRUE); } /* * Returns true if this is a valid userspace-type property (one with a '@'). * Note that after the @, any character is valid (eg, another @, for SID * user@domain). */ boolean_t zfs_prop_userquota(const char *name) { zfs_userquota_prop_t prop; for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) { if (strncmp(name, zfs_userquota_prop_prefixes[prop], strlen(zfs_userquota_prop_prefixes[prop])) == 0) { return (B_TRUE); } } return (B_FALSE); } +/* + * Returns true if this is a valid written@ property. + * Note that after the @, any character is valid (eg, another @, for + * written@pool/fs@origin). + */ +boolean_t +zfs_prop_written(const char *name) +{ + static const char *prefix = "written@"; + return (strncmp(name, prefix, strlen(prefix)) == 0); +} + /* * Tables of index types, plus functions to convert between the user view * (strings) and internal representation (uint64_t). */ int zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index) { return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET)); } int zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string) { return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET)); } uint64_t zfs_prop_random_value(zfs_prop_t prop, uint64_t seed) { return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET)); } /* * Returns TRUE if the property applies to any of the given dataset types. */ boolean_t zfs_prop_valid_for_type(int prop, zfs_type_t types) { return (zprop_valid_for_type(prop, types)); } zprop_type_t zfs_prop_get_type(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_proptype); } /* * Returns TRUE if the property is readonly. */ boolean_t zfs_prop_readonly(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_READONLY || zfs_prop_table[prop].pd_attr == PROP_ONETIME); } /* * Returns TRUE if the property is only allowed to be set once. */ boolean_t zfs_prop_setonce(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); } const char * zfs_prop_default_string(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_strdefault); } uint64_t zfs_prop_default_numeric(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_numdefault); } /* * Given a dataset property ID, returns the corresponding name. * Assuming the zfs dataset property ID is valid. */ const char * zfs_prop_to_name(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_name); } /* * Returns TRUE if the property is inheritable. */ boolean_t zfs_prop_inheritable(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_INHERIT || zfs_prop_table[prop].pd_attr == PROP_ONETIME); } #ifndef _KERNEL /* * Returns a string describing the set of acceptable values for the given * zfs property, or NULL if it cannot be set. */ const char * zfs_prop_values(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_values); } /* * Returns TRUE if this property is a string type. Note that index types * (compression, checksum) are treated as strings in userland, even though they * are stored numerically on disk. */ int zfs_prop_is_string(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING || zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX); } /* * Returns the column header for the given property. Used only in * 'zfs list -o', but centralized here with the other property information. */ const char * zfs_prop_column_name(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_colname); } /* * Returns whether the given property should be displayed right-justified for * 'zfs list'. */ boolean_t zfs_prop_align_right(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_rightalign); } #endif #if defined(_KERNEL) && defined(HAVE_SPL) static int zcommon_init(void) { return 0; } static int zcommon_fini(void) { return 0; } spl_module_init(zcommon_init); spl_module_exit(zcommon_fini); MODULE_DESCRIPTION("Generic ZFS support"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_LICENSE(ZFS_META_LICENSE); /* zfs dataset property functions */ EXPORT_SYMBOL(zfs_userquota_prop_prefixes); EXPORT_SYMBOL(zfs_prop_init); EXPORT_SYMBOL(zfs_prop_get_type); EXPORT_SYMBOL(zfs_prop_get_table); EXPORT_SYMBOL(zfs_prop_delegatable); /* Dataset property functions shared between libzfs and kernel. */ EXPORT_SYMBOL(zfs_prop_default_string); EXPORT_SYMBOL(zfs_prop_default_numeric); EXPORT_SYMBOL(zfs_prop_readonly); EXPORT_SYMBOL(zfs_prop_inheritable); EXPORT_SYMBOL(zfs_prop_setonce); EXPORT_SYMBOL(zfs_prop_to_name); EXPORT_SYMBOL(zfs_name_to_prop); EXPORT_SYMBOL(zfs_prop_user); EXPORT_SYMBOL(zfs_prop_userquota); EXPORT_SYMBOL(zfs_prop_index_to_string); EXPORT_SYMBOL(zfs_prop_string_to_index); EXPORT_SYMBOL(zfs_prop_valid_for_type); #endif diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index 72be31235607..022921c666b8 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -1,495 +1,500 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include +#include uint64_t bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) { int size; if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) size = BPOBJ_SIZE_V0; else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) size = BPOBJ_SIZE_V1; else size = sizeof (bpobj_phys_t); return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, DMU_OT_BPOBJ_HDR, size, tx)); } void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) { int64_t i; bpobj_t bpo; dmu_object_info_t doi; int epb; dmu_buf_t *dbuf = NULL; VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); mutex_enter(&bpo.bpo_lock); if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) goto out; VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); epb = doi.doi_data_block_size / sizeof (uint64_t); for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { uint64_t *objarray; uint64_t offset, blkoff; offset = i * sizeof (uint64_t); blkoff = P2PHASE(i, epb); if (dbuf == NULL || dbuf->db_offset > offset) { if (dbuf) dmu_buf_rele(dbuf, FTAG); VERIFY3U(0, ==, dmu_buf_hold(os, bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); } ASSERT3U(offset, >=, dbuf->db_offset); ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); objarray = dbuf->db_data; bpobj_free(os, objarray[blkoff], tx); } if (dbuf) { dmu_buf_rele(dbuf, FTAG); dbuf = NULL; } VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); out: mutex_exit(&bpo.bpo_lock); bpobj_close(&bpo); VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); } int bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) { dmu_object_info_t doi; int err; err = dmu_object_info(os, object, &doi); if (err) return (err); bzero(bpo, sizeof (*bpo)); mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); ASSERT(bpo->bpo_dbuf == NULL); ASSERT(bpo->bpo_phys == NULL); ASSERT(object != 0); ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); if (err) return (err); bpo->bpo_os = os; bpo->bpo_object = object; bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); bpo->bpo_phys = bpo->bpo_dbuf->db_data; return (0); } void bpobj_close(bpobj_t *bpo) { /* Lame workaround for closing a bpobj that was never opened. */ if (bpo->bpo_object == 0) return; dmu_buf_rele(bpo->bpo_dbuf, bpo); if (bpo->bpo_cached_dbuf != NULL) dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); bpo->bpo_dbuf = NULL; bpo->bpo_phys = NULL; bpo->bpo_cached_dbuf = NULL; bpo->bpo_object = 0; mutex_destroy(&bpo->bpo_lock); } static int bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, boolean_t free) { dmu_object_info_t doi; int epb; int64_t i; int err = 0; dmu_buf_t *dbuf = NULL; mutex_enter(&bpo->bpo_lock); if (free) dmu_buf_will_dirty(bpo->bpo_dbuf, tx); for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) { blkptr_t *bparray; blkptr_t *bp; uint64_t offset, blkoff; offset = i * sizeof (blkptr_t); blkoff = P2PHASE(i, bpo->bpo_epb); if (dbuf == NULL || dbuf->db_offset > offset) { if (dbuf) dmu_buf_rele(dbuf, FTAG); err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, FTAG, &dbuf, 0); if (err) break; } ASSERT3U(offset, >=, dbuf->db_offset); ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); bparray = dbuf->db_data; bp = &bparray[blkoff]; err = func(arg, bp, tx); if (err) break; if (free) { bpo->bpo_phys->bpo_bytes -= bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); if (bpo->bpo_havecomp) { bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp); bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp); } bpo->bpo_phys->bpo_num_blkptrs--; ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); } } if (dbuf) { dmu_buf_rele(dbuf, FTAG); dbuf = NULL; } if (free) { i++; VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, i * sizeof (blkptr_t), -1ULL, tx)); } if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) goto out; ASSERT(bpo->bpo_havecomp); err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); if (err) { mutex_exit(&bpo->bpo_lock); return (err); } epb = doi.doi_data_block_size / sizeof (uint64_t); for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { uint64_t *objarray; uint64_t offset, blkoff; bpobj_t sublist; uint64_t used_before, comp_before, uncomp_before; uint64_t used_after, comp_after, uncomp_after; offset = i * sizeof (uint64_t); blkoff = P2PHASE(i, epb); if (dbuf == NULL || dbuf->db_offset > offset) { if (dbuf) dmu_buf_rele(dbuf, FTAG); err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0); if (err) break; } ASSERT3U(offset, >=, dbuf->db_offset); ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); objarray = dbuf->db_data; err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]); if (err) break; if (free) { err = bpobj_space(&sublist, &used_before, &comp_before, &uncomp_before); if (err) break; } err = bpobj_iterate_impl(&sublist, func, arg, tx, free); if (free) { VERIFY3U(0, ==, bpobj_space(&sublist, &used_after, &comp_after, &uncomp_after)); bpo->bpo_phys->bpo_bytes -= used_before - used_after; ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); bpo->bpo_phys->bpo_comp -= comp_before - comp_after; bpo->bpo_phys->bpo_uncomp -= uncomp_before - uncomp_after; } bpobj_close(&sublist); if (err) break; if (free) { err = dmu_object_free(bpo->bpo_os, objarray[blkoff], tx); if (err) break; bpo->bpo_phys->bpo_num_subobjs--; ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0); } } if (dbuf) { dmu_buf_rele(dbuf, FTAG); dbuf = NULL; } if (free) { VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, (i + 1) * sizeof (uint64_t), -1ULL, tx)); } out: /* If there are no entries, there should be no bytes. */ ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 || (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) || bpo->bpo_phys->bpo_bytes == 0); mutex_exit(&bpo->bpo_lock); return (err); } /* * Iterate and remove the entries. If func returns nonzero, iteration * will stop and that entry will not be removed. */ int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) { return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE)); } /* * Iterate the entries. If func returns nonzero, iteration will stop. */ int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) { return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE)); } void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) { bpobj_t subbpo; uint64_t used, comp, uncomp, subsubobjs; ASSERT(bpo->bpo_havesubobj); ASSERT(bpo->bpo_havecomp); VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); if (used == 0) { /* No point in having an empty subobj. */ bpobj_close(&subbpo); bpobj_free(bpo->bpo_os, subobj, tx); return; } dmu_buf_will_dirty(bpo->bpo_dbuf, tx); if (bpo->bpo_phys->bpo_subobjs == 0) { bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); } mutex_enter(&bpo->bpo_lock); dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), sizeof (subobj), &subobj, tx); bpo->bpo_phys->bpo_num_subobjs++; /* * If subobj has only one block of subobjs, then move subobj's * subobjs to bpo's subobj list directly. This reduces * recursion in bpobj_iterate due to nested subobjs. */ subsubobjs = subbpo.bpo_phys->bpo_subobjs; if (subsubobjs != 0) { dmu_object_info_t doi; VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); if (doi.doi_max_offset == doi.doi_data_block_size) { dmu_buf_t *subdb; uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs, 0, FTAG, &subdb, 0)); dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), numsubsub * sizeof (subobj), subdb->db_data, tx); dmu_buf_rele(subdb, FTAG); bpo->bpo_phys->bpo_num_subobjs += numsubsub; dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); subbpo.bpo_phys->bpo_subobjs = 0; VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os, subsubobjs, tx)); } } bpo->bpo_phys->bpo_bytes += used; bpo->bpo_phys->bpo_comp += comp; bpo->bpo_phys->bpo_uncomp += uncomp; mutex_exit(&bpo->bpo_lock); bpobj_close(&subbpo); } void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) { blkptr_t stored_bp = *bp; uint64_t offset; int blkoff; blkptr_t *bparray; ASSERT(!BP_IS_HOLE(bp)); /* We never need the fill count. */ stored_bp.blk_fill = 0; /* The bpobj will compress better if we can leave off the checksum */ if (!BP_GET_DEDUP(bp)) bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum)); mutex_enter(&bpo->bpo_lock); offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); if (bpo->bpo_cached_dbuf == NULL || offset < bpo->bpo_cached_dbuf->db_offset || offset >= bpo->bpo_cached_dbuf->db_offset + bpo->bpo_cached_dbuf->db_size) { if (bpo->bpo_cached_dbuf) dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, bpo, &bpo->bpo_cached_dbuf, 0)); } dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); bparray = bpo->bpo_cached_dbuf->db_data; bparray[blkoff] = stored_bp; dmu_buf_will_dirty(bpo->bpo_dbuf, tx); bpo->bpo_phys->bpo_num_blkptrs++; bpo->bpo_phys->bpo_bytes += bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); if (bpo->bpo_havecomp) { bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp); bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp); } mutex_exit(&bpo->bpo_lock); } struct space_range_arg { spa_t *spa; uint64_t mintxg; uint64_t maxtxg; uint64_t used; uint64_t comp; uint64_t uncomp; }; /* ARGSUSED */ static int space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { struct space_range_arg *sra = arg; if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { - sra->used += bp_get_dsize_sync(sra->spa, bp); + if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) + sra->used += bp_get_dsize_sync(sra->spa, bp); + else + sra->used += bp_get_dsize(sra->spa, bp); sra->comp += BP_GET_PSIZE(bp); sra->uncomp += BP_GET_UCSIZE(bp); } return (0); } int bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { mutex_enter(&bpo->bpo_lock); *usedp = bpo->bpo_phys->bpo_bytes; if (bpo->bpo_havecomp) { *compp = bpo->bpo_phys->bpo_comp; *uncompp = bpo->bpo_phys->bpo_uncomp; mutex_exit(&bpo->bpo_lock); return (0); } else { mutex_exit(&bpo->bpo_lock); return (bpobj_space_range(bpo, 0, UINT64_MAX, usedp, compp, uncompp)); } } /* * Return the amount of space in the bpobj which is: * mintxg < blk_birth <= maxtxg */ int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { struct space_range_arg sra = { 0 }; int err; /* * As an optimization, if they want the whole txg range, just * get bpo_bytes rather than iterating over the bps. */ if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp) return (bpobj_space(bpo, usedp, compp, uncompp)); sra.spa = dmu_objset_spa(bpo->bpo_os); sra.mintxg = mintxg; sra.maxtxg = maxtxg; err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL); *usedp = sra.used; *compp = sra.comp; *uncompp = sra.uncomp; return (err); } diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index fad770e275c0..2f06132116e8 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -1,1584 +1,1679 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ -/* + * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ +int zfs_send_corrupt_data = B_FALSE; + static char *dmu_recv_tag = "dmu_recv_tag"; /* * The list of data whose inclusion in a send stream can be pending from * one call to backup_cb to another. Multiple calls to dump_free() and * dump_freeobjects() can be aggregated into a single DRR_FREE or * DRR_FREEOBJECTS replay record. */ typedef enum { PENDING_NONE, PENDING_FREE, PENDING_FREEOBJECTS } pendop_t; struct backuparg { dmu_replay_record_t *drr; vnode_t *vp; offset_t *off; objset_t *os; zio_cksum_t zc; uint64_t toguid; int err; pendop_t pending_op; }; static int dump_bytes(struct backuparg *ba, void *buf, int len) { ssize_t resid; /* have to get resid to get detailed errno */ ASSERT3U(len % 8, ==, 0); fletcher_4_incremental_native(buf, len, &ba->zc); ba->err = vn_rdwr(UIO_WRITE, ba->vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); *ba->off += len; return (ba->err); } static int dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, uint64_t length) { struct drr_free *drrf = &(ba->drr->drr_u.drr_free); /* * If there is a pending op, but it's not PENDING_FREE, push it out, * since free block aggregation can only be done for blocks of the * same type (i.e., DRR_FREE records can only be aggregated with * other DRR_FREE records. DRR_FREEOBJECTS records can only be * aggregated with other DRR_FREEOBJECTS records. */ if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } if (ba->pending_op == PENDING_FREE) { /* * There should never be a PENDING_FREE if length is -1 * (because dump_dnode is the only place where this * function is called with a -1, and only after flushing * any pending record). */ ASSERT(length != -1ULL); /* * Check to see whether this free block can be aggregated * with pending one. */ if (drrf->drr_object == object && drrf->drr_offset + drrf->drr_length == offset) { drrf->drr_length += length; return (0); } else { /* not a continuation. Push out pending record */ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } } /* create a FREE record and make it pending */ bzero(ba->drr, sizeof (dmu_replay_record_t)); ba->drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; drrf->drr_length = length; drrf->drr_toguid = ba->toguid; if (length == -1ULL) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); } else { ba->pending_op = PENDING_FREE; } return (0); } static int dump_data(struct backuparg *ba, dmu_object_type_t type, uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) { struct drr_write *drrw = &(ba->drr->drr_u.drr_write); /* * If there is any kind of pending aggregation (currently either * a grouping of free objects or free blocks), push it out to * the stream, since aggregation can't be done across operations * of different types. */ if (ba->pending_op != PENDING_NONE) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } /* write a DATA record */ bzero(ba->drr, sizeof (dmu_replay_record_t)); ba->drr->drr_type = DRR_WRITE; drrw->drr_object = object; drrw->drr_type = type; drrw->drr_offset = offset; drrw->drr_length = blksz; drrw->drr_toguid = ba->toguid; drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); if (dump_bytes(ba, data, blksz) != 0) return (EINTR); return (0); } static int dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) { struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); if (ba->pending_op != PENDING_NONE) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } /* write a SPILL record */ bzero(ba->drr, sizeof (dmu_replay_record_t)); ba->drr->drr_type = DRR_SPILL; drrs->drr_object = object; drrs->drr_length = blksz; drrs->drr_toguid = ba->toguid; if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) return (EINTR); if (dump_bytes(ba, data, blksz)) return (EINTR); return (0); } static int dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) { struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, * push it out, since free block aggregation can only be done for * blocks of the same type (i.e., DRR_FREE records can only be * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records * can only be aggregated with other DRR_FREEOBJECTS records. */ if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREEOBJECTS) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } if (ba->pending_op == PENDING_FREEOBJECTS) { /* * See whether this free object array can be aggregated * with pending one */ if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { drrfo->drr_numobjs += numobjs; return (0); } else { /* can't be aggregated. Push out pending record */ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } } /* write a FREEOBJECTS record */ bzero(ba->drr, sizeof (dmu_replay_record_t)); ba->drr->drr_type = DRR_FREEOBJECTS; drrfo->drr_firstobj = firstobj; drrfo->drr_numobjs = numobjs; drrfo->drr_toguid = ba->toguid; ba->pending_op = PENDING_FREEOBJECTS; return (0); } static int dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) { struct drr_object *drro = &(ba->drr->drr_u.drr_object); if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) return (dump_freeobjects(ba, object, 1)); if (ba->pending_op != PENDING_NONE) { if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); ba->pending_op = PENDING_NONE; } /* write an OBJECT record */ bzero(ba->drr, sizeof (dmu_replay_record_t)); ba->drr->drr_type = DRR_OBJECT; drro->drr_object = object; drro->drr_type = dnp->dn_type; drro->drr_bonustype = dnp->dn_bonustype; drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; drro->drr_bonuslen = dnp->dn_bonuslen; drro->drr_checksumtype = dnp->dn_checksum; drro->drr_compress = dnp->dn_compress; drro->drr_toguid = ba->toguid; if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) return (EINTR); /* free anything past the end of the file */ if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) return (EINTR); if (ba->err) return (EINTR); return (0); } #define BP_SPAN(dnp, level) \ (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) /* ARGSUSED */ static int backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { struct backuparg *ba = arg; dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { return (0); } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { uint64_t span = BP_SPAN(dnp, zb->zb_level); uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { uint64_t span = BP_SPAN(dnp, zb->zb_level); err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { dnode_phys_t *blk; int i; int blksz = BP_GET_LSIZE(bp); uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) return (EIO); blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = dump_dnode(ba, dnobj, blk+i); if (err) break; } (void) arc_buf_remove_ref(abuf, &abuf); } else if (type == DMU_OT_SA) { uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); if (arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) return (EIO); err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { + if (zfs_send_corrupt_data) { + uint64_t *ptr; + /* Send a block filled with 0x"zfs badd bloc" */ + abuf = arc_buf_alloc(spa, blksz, &abuf, + ARC_BUFC_DATA); + for (ptr = abuf->b_data; + (char *)ptr < (char *)abuf->b_data + blksz; + ptr++) + *ptr = 0x2f5baddb10c; + } else { + return (EIO); + } + } err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } ASSERT(err == 0 || err == EINTR); return (err); } int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, vnode_t *vp, offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dmu_replay_record_t *drr; struct backuparg ba; int err; uint64_t fromtxg = 0; /* tosnap must be a snapshot */ if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); /* fromsnap must be an earlier snapshot from the same fs as tosnap */ if (fromds && (ds->ds_dir != fromds->ds_dir || fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) return (EXDEV); if (fromorigin) { dsl_pool_t *dp = ds->ds_dir->dd_pool; if (fromsnap) return (EINVAL); if (dsl_dir_is_clone(ds->ds_dir)) { rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); rw_exit(&dp->dp_config_rwlock); if (err) return (err); } else { fromorigin = B_FALSE; } } drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, DMU_SUBSTREAM); #ifdef _KERNEL if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { uint64_t version; if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) return (EINVAL); if (version == ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, DMU_BACKUP_FEATURE_SA_SPILL); } } #endif drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; if (fromorigin) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; if (fromds) drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); if (fromds) fromtxg = fromds->ds_phys->ds_creation_txg; if (fromorigin) dsl_dataset_rele(fromds, FTAG); ba.drr = drr; ba.vp = vp; ba.os = tosnap; ba.off = off; ba.toguid = ds->ds_phys->ds_guid; ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); ba.pending_op = PENDING_NONE; if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { kmem_free(drr, sizeof (dmu_replay_record_t)); return (ba.err); } err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, backup_cb, &ba); if (ba.pending_op != PENDING_NONE) if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) err = EINTR; if (err) { if (err == EINTR && ba.err) err = ba.err; kmem_free(drr, sizeof (dmu_replay_record_t)); return (err); } bzero(drr, sizeof (dmu_replay_record_t)); drr->drr_type = DRR_END; drr->drr_u.drr_end.drr_checksum = ba.zc; drr->drr_u.drr_end.drr_toguid = ba.toguid; if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { kmem_free(drr, sizeof (dmu_replay_record_t)); return (ba.err); } kmem_free(drr, sizeof (dmu_replay_record_t)); return (0); } +int +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + uint64_t *sizep) +{ + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + int err; + uint64_t size, recordsize; + + /* tosnap must be a snapshot */ + if (ds->ds_phys->ds_next_snap_obj == 0) + return (EINVAL); + + /* fromsnap must be an earlier snapshot from the same fs as tosnap */ + if (fromds && (ds->ds_dir != fromds->ds_dir || + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + return (EXDEV); + + if (fromorigin) { + if (fromsnap) + return (EINVAL); + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + /* Get uncompressed size estimate of changed data. */ + if (fromds == NULL) { + size = ds->ds_phys->ds_uncompressed_bytes; + } else { + uint64_t used, comp; + err = dsl_dataset_space_written(fromds, ds, + &used, &comp, &size); + if (fromorigin) + dsl_dataset_rele(fromds, FTAG); + if (err) + return (err); + } + + /* + * Assume that space (both on-disk and in-stream) is dominated by + * data. We will adjust for indirect blocks and the copies property, + * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). + */ + + /* + * Subtract out approximate space used by indirect blocks. + * Assume most space is used by data blocks (non-indirect, non-dnode). + * Assume all blocks are recordsize. Assume ditto blocks and + * internal fragmentation counter out compression. + * + * Therefore, space used by indirect blocks is sizeof(blkptr_t) per + * block, which we observe in practice. + */ + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_prop_get_ds(ds, "recordsize", + sizeof (recordsize), 1, &recordsize, NULL); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + size -= size / recordsize * sizeof (blkptr_t); + + /* Add in the space for the record associated with each block. */ + size += size / recordsize * sizeof (dmu_replay_record_t); + + *sizep = size; + + return (0); +} + struct recvbeginsyncarg { const char *tofs; const char *tosnap; dsl_dataset_t *origin; uint64_t fromguid; dmu_objset_type_t type; void *tag; boolean_t force; uint64_t dsflags; char clonelastname[MAXNAMELEN]; dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ cred_t *cr; }; /* ARGSUSED */ static int recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct recvbeginsyncarg *rbsa = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t val; int err; err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); if (err != ENOENT) return (err ? err : EEXIST); if (rbsa->origin) { /* make sure it's a snap in the same pool */ if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) return (EXDEV); if (!dsl_dataset_is_snapshot(rbsa->origin)) return (EINVAL); if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) return (ENODEV); } return (0); } static void recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct recvbeginsyncarg *rbsa = arg2; uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; uint64_t dsobj; /* Create and open new dataset. */ dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, rbsa->origin, flags, rbsa->cr, tx); VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, B_TRUE, dmu_recv_tag, &rbsa->ds)); if (rbsa->origin == NULL) { (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); } /* ARGSUSED */ static int recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct recvbeginsyncarg *rbsa = arg2; int err; uint64_t val; /* must not have any changes since most recent snapshot */ if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) return (ETXTBSY); /* new snapshot name must not exist */ err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); if (err == 0) return (EEXIST); if (err != ENOENT) return (err); if (rbsa->fromguid) { /* if incremental, most recent snapshot must match fromguid */ if (ds->ds_prev == NULL) return (ENODEV); /* * most recent snapshot must match fromguid, or there are no * changes since the fromguid one */ if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; while (obj != 0) { dsl_dataset_t *snap; err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, obj, FTAG, &snap); if (err) return (ENODEV); if (snap->ds_phys->ds_creation_txg < birth) { dsl_dataset_rele(snap, FTAG); return (ENODEV); } if (snap->ds_phys->ds_guid == rbsa->fromguid) { dsl_dataset_rele(snap, FTAG); break; /* it's ok */ } obj = snap->ds_phys->ds_prev_snap_obj; dsl_dataset_rele(snap, FTAG); } if (obj == 0) return (ENODEV); } } else { /* if full, most recent snapshot must be $ORIGIN */ if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) return (ENODEV); } /* temporary clone name must not exist */ err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_dir->dd_phys->dd_child_dir_zapobj, rbsa->clonelastname, 8, 1, &val); if (err == 0) return (EEXIST); if (err != ENOENT) return (err); return (0); } /* ARGSUSED */ static void recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ohds = arg1; struct recvbeginsyncarg *rbsa = arg2; dsl_pool_t *dp = ohds->ds_dir->dd_pool; dsl_dataset_t *cds; uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; uint64_t dsobj; /* create and open the temporary clone */ dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, ohds->ds_prev, flags, rbsa->cr, tx); VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); /* * If we actually created a non-clone, we need to create the * objset in our new dataset. */ if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { (void) dmu_objset_create_impl(dp->dp_spa, cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); } rbsa->ds = cds; spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, dp->dp_spa, tx, "dataset = %lld", dsobj); } static boolean_t dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) { int featureflags; featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); /* Verify pool version supports SA if SA_SPILL feature set */ return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); } /* * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() * succeeds; otherwise we will leak the holds on the datasets. */ int dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) { int err = 0; boolean_t byteswap; struct recvbeginsyncarg rbsa = { 0 }; uint64_t versioninfo; int flags; dsl_dataset_t *ds; if (drrb->drr_magic == DMU_BACKUP_MAGIC) byteswap = FALSE; else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) byteswap = TRUE; else return (EINVAL); rbsa.tofs = tofs; rbsa.tosnap = tosnap; rbsa.origin = origin ? origin->os_dsl_dataset : NULL; rbsa.fromguid = drrb->drr_fromguid; rbsa.type = drrb->drr_type; rbsa.tag = FTAG; rbsa.dsflags = 0; rbsa.cr = CRED(); versioninfo = drrb->drr_versioninfo; flags = drrb->drr_flags; if (byteswap) { rbsa.type = BSWAP_32(rbsa.type); rbsa.fromguid = BSWAP_64(rbsa.fromguid); versioninfo = BSWAP_64(versioninfo); flags = BSWAP_32(flags); } if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || rbsa.type >= DMU_OST_NUMTYPES || ((flags & DRR_FLAG_CLONE) && origin == NULL)) return (EINVAL); if (flags & DRR_FLAG_CI_DATA) rbsa.dsflags = DS_FLAG_CI_DATASET; bzero(drc, sizeof (dmu_recv_cookie_t)); drc->drc_drrb = drrb; drc->drc_tosnap = tosnap; drc->drc_top_ds = top_ds; drc->drc_force = force; /* * Process the begin in syncing context. */ /* open the dataset we are logically receiving into */ err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); if (err == 0) { if (dmu_recv_verify_features(ds, drrb)) { dsl_dataset_rele(ds, dmu_recv_tag); return (ENOTSUP); } /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ if (flags & DRR_FLAG_CLONE) { dsl_dataset_rele(ds, dmu_recv_tag); return (EINVAL); } /* must not have an incremental recv already in progress */ if (!mutex_tryenter(&ds->ds_recvlock)) { dsl_dataset_rele(ds, dmu_recv_tag); return (EBUSY); } /* tmp clone name is: tofs/%tosnap" */ (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), "%%%s", tosnap); rbsa.force = force; err = dsl_sync_task_do(ds->ds_dir->dd_pool, recv_existing_check, recv_existing_sync, ds, &rbsa, 5); if (err) { mutex_exit(&ds->ds_recvlock); dsl_dataset_rele(ds, dmu_recv_tag); return (err); } drc->drc_logical_ds = ds; drc->drc_real_ds = rbsa.ds; } else if (err == ENOENT) { /* target fs does not exist; must be a full backup or clone */ char *cp; /* * If it's a non-clone incremental, we are missing the * target fs, so fail the recv. */ if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) return (ENOENT); /* Open the parent of tofs */ cp = strrchr(tofs, '/'); *cp = '\0'; err = dsl_dataset_hold(tofs, FTAG, &ds); *cp = '/'; if (err) return (err); if (dmu_recv_verify_features(ds, drrb)) { dsl_dataset_rele(ds, FTAG); return (ENOTSUP); } err = dsl_sync_task_do(ds->ds_dir->dd_pool, recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); dsl_dataset_rele(ds, FTAG); if (err) return (err); drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; drc->drc_newfs = B_TRUE; } return (err); } struct restorearg { int err; int byteswap; vnode_t *vp; char *buf; uint64_t voff; int bufsize; /* amount of memory allocated for buf */ zio_cksum_t cksum; avl_tree_t *guid_to_ds_map; }; typedef struct guid_map_entry { uint64_t guid; dsl_dataset_t *gme_ds; avl_node_t avlnode; } guid_map_entry_t; static int guid_compare(const void *arg1, const void *arg2) { const guid_map_entry_t *gmep1 = arg1; const guid_map_entry_t *gmep2 = arg2; if (gmep1->guid < gmep2->guid) return (-1); else if (gmep1->guid > gmep2->guid) return (1); return (0); } static void free_guid_map_onexit(void *arg) { avl_tree_t *ca = arg; void *cookie = NULL; guid_map_entry_t *gmep; while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { dsl_dataset_rele(gmep->gme_ds, ca); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); kmem_free(ca, sizeof (avl_tree_t)); } static void * restore_read(struct restorearg *ra, int len) { void *rv; int done = 0; /* some things will require 8-byte alignment, so everything must */ ASSERT3U(len % 8, ==, 0); while (done < len) { ssize_t resid; ra->err = vn_rdwr(UIO_READ, ra->vp, (caddr_t)ra->buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); if (resid == len - done) ra->err = EINVAL; ra->voff += len - done - resid; done = len - resid; if (ra->err) return (NULL); } ASSERT3U(done, ==, len); rv = ra->buf; if (ra->byteswap) fletcher_4_incremental_byteswap(rv, len, &ra->cksum); else fletcher_4_incremental_native(rv, len, &ra->cksum); return (rv); } noinline static void backup_byteswap(dmu_replay_record_t *drr) { #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) drr->drr_type = BSWAP_32(drr->drr_type); drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); switch (drr->drr_type) { case DRR_BEGIN: DO64(drr_begin.drr_magic); DO64(drr_begin.drr_versioninfo); DO64(drr_begin.drr_creation_time); DO32(drr_begin.drr_type); DO32(drr_begin.drr_flags); DO64(drr_begin.drr_toguid); DO64(drr_begin.drr_fromguid); break; case DRR_OBJECT: DO64(drr_object.drr_object); /* DO64(drr_object.drr_allocation_txg); */ DO32(drr_object.drr_type); DO32(drr_object.drr_bonustype); DO32(drr_object.drr_blksz); DO32(drr_object.drr_bonuslen); DO64(drr_object.drr_toguid); break; case DRR_FREEOBJECTS: DO64(drr_freeobjects.drr_firstobj); DO64(drr_freeobjects.drr_numobjs); DO64(drr_freeobjects.drr_toguid); break; case DRR_WRITE: DO64(drr_write.drr_object); DO32(drr_write.drr_type); DO64(drr_write.drr_offset); DO64(drr_write.drr_length); DO64(drr_write.drr_toguid); DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); DO64(drr_write.drr_key.ddk_prop); break; case DRR_WRITE_BYREF: DO64(drr_write_byref.drr_object); DO64(drr_write_byref.drr_offset); DO64(drr_write_byref.drr_length); DO64(drr_write_byref.drr_toguid); DO64(drr_write_byref.drr_refguid); DO64(drr_write_byref.drr_refobject); DO64(drr_write_byref.drr_refoffset); DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); DO64(drr_write_byref.drr_key.ddk_prop); break; case DRR_FREE: DO64(drr_free.drr_object); DO64(drr_free.drr_offset); DO64(drr_free.drr_length); DO64(drr_free.drr_toguid); break; case DRR_SPILL: DO64(drr_spill.drr_object); DO64(drr_spill.drr_length); DO64(drr_spill.drr_toguid); break; case DRR_END: DO64(drr_end.drr_checksum.zc_word[0]); DO64(drr_end.drr_checksum.zc_word[1]); DO64(drr_end.drr_checksum.zc_word[2]); DO64(drr_end.drr_checksum.zc_word[3]); DO64(drr_end.drr_toguid); break; default: break; } #undef DO64 #undef DO32 } noinline static int restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) { int err; dmu_tx_t *tx; void *data = NULL; if (drro->drr_type == DMU_OT_NONE || drro->drr_type >= DMU_OT_NUMTYPES || drro->drr_bonustype >= DMU_OT_NUMTYPES || drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || drro->drr_blksz < SPA_MINBLOCKSIZE || drro->drr_blksz > SPA_MAXBLOCKSIZE || drro->drr_bonuslen > DN_MAX_BONUSLEN) { return (EINVAL); } err = dmu_object_info(os, drro->drr_object, NULL); if (err != 0 && err != ENOENT) return (EINVAL); if (drro->drr_bonuslen) { data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); if (ra->err) return (ra->err); } if (err == ENOENT) { /* currently free, want to be allocated */ tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } err = dmu_object_claim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); dmu_tx_commit(tx); } else { /* currently allocated, want to be allocated */ err = dmu_object_reclaim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen); } if (err) { return (EINVAL); } tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, drro->drr_object); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, tx); dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); if (data != NULL) { dmu_buf_t *db; VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); bcopy(data, db->db_data, drro->drr_bonuslen); if (ra->byteswap) { dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, drro->drr_bonuslen); } dmu_buf_rele(db, FTAG); } dmu_tx_commit(tx); return (0); } /* ARGSUSED */ noinline static int restore_freeobjects(struct restorearg *ra, objset_t *os, struct drr_freeobjects *drrfo) { uint64_t obj; if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) return (EINVAL); for (obj = drrfo->drr_firstobj; obj < drrfo->drr_firstobj + drrfo->drr_numobjs; (void) dmu_object_next(os, &obj, FALSE, 0)) { int err; if (dmu_object_info(os, obj, NULL) != 0) continue; err = dmu_free_object(os, obj); if (err) return (err); } return (0); } noinline static int restore_write(struct restorearg *ra, objset_t *os, struct drr_write *drrw) { dmu_tx_t *tx; void *data; int err; if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || drrw->drr_type >= DMU_OT_NUMTYPES) return (EINVAL); data = restore_read(ra, drrw->drr_length); if (data == NULL) return (ra->err); if (dmu_object_info(os, drrw->drr_object, NULL) != 0) return (EINVAL); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } if (ra->byteswap) dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); dmu_write(os, drrw->drr_object, drrw->drr_offset, drrw->drr_length, data, tx); dmu_tx_commit(tx); return (0); } /* * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed * streams to refer to a copy of the data that is already on the * system because it came in earlier in the stream. This function * finds the earlier copy of the data, and uses that copy instead of * data from the stream to fulfill this write. */ static int restore_write_byref(struct restorearg *ra, objset_t *os, struct drr_write_byref *drrwbr) { dmu_tx_t *tx; int err; guid_map_entry_t gmesrch; guid_map_entry_t *gmep; avl_index_t where; objset_t *ref_os = NULL; dmu_buf_t *dbp; if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) return (EINVAL); /* * If the GUID of the referenced dataset is different from the * GUID of the target dataset, find the referenced dataset. */ if (drrwbr->drr_toguid != drrwbr->drr_refguid) { gmesrch.guid = drrwbr->drr_refguid; if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, &where)) == NULL) { return (EINVAL); } if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) return (EINVAL); } else { ref_os = os; } err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); if (err) return (err); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } dmu_write(os, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); dmu_buf_rele(dbp, FTAG); dmu_tx_commit(tx); return (0); } static int restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) { dmu_tx_t *tx; void *data; dmu_buf_t *db, *db_spill; int err; if (drrs->drr_length < SPA_MINBLOCKSIZE || drrs->drr_length > SPA_MAXBLOCKSIZE) return (EINVAL); data = restore_read(ra, drrs->drr_length); if (data == NULL) return (ra->err); if (dmu_object_info(os, drrs->drr_object, NULL) != 0) return (EINVAL); VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { dmu_buf_rele(db, FTAG); return (err); } tx = dmu_tx_create(os); dmu_tx_hold_spill(tx, db->db_object); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); dmu_tx_abort(tx); return (err); } dmu_buf_will_dirty(db_spill, tx); if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); bcopy(data, db_spill->db_data, drrs->drr_length); dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); dmu_tx_commit(tx); return (0); } /* ARGSUSED */ noinline static int restore_free(struct restorearg *ra, objset_t *os, struct drr_free *drrf) { int err; if (drrf->drr_length != -1ULL && drrf->drr_offset + drrf->drr_length < drrf->drr_offset) return (EINVAL); if (dmu_object_info(os, drrf->drr_object, NULL) != 0) return (EINVAL); err = dmu_free_long_range(os, drrf->drr_object, drrf->drr_offset, drrf->drr_length); return (err); } /* * NB: callers *must* call dmu_recv_end() if this succeeds. */ int dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int cleanup_fd, uint64_t *action_handlep) { struct restorearg ra = { 0 }; dmu_replay_record_t *drr; objset_t *os; zio_cksum_t pcksum; int featureflags; if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) ra.byteswap = TRUE; { /* compute checksum of drr_begin record */ dmu_replay_record_t *drr; drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin = *drc->drc_drrb; if (ra.byteswap) { fletcher_4_incremental_byteswap(drr, sizeof (dmu_replay_record_t), &ra.cksum); } else { fletcher_4_incremental_native(drr, sizeof (dmu_replay_record_t), &ra.cksum); } kmem_free(drr, sizeof (dmu_replay_record_t)); } if (ra.byteswap) { struct drr_begin *drrb = drc->drc_drrb; drrb->drr_magic = BSWAP_64(drrb->drr_magic); drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); drrb->drr_type = BSWAP_32(drrb->drr_type); drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } ra.vp = vp; ra.voff = *voffp; ra.bufsize = 1<<20; ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == DMU_SUBSTREAM); ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); /* * Open the objset we are modifying. */ VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { minor_t minor; if (cleanup_fd == -1) { ra.err = EBADF; goto out; } ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); if (ra.err) { cleanup_fd = -1; goto out; } if (*action_handlep == 0) { ra.guid_to_ds_map = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); avl_create(ra.guid_to_ds_map, guid_compare, sizeof (guid_map_entry_t), offsetof(guid_map_entry_t, avlnode)); ra.err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); if (ra.err) goto out; } else { ra.err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&ra.guid_to_ds_map); if (ra.err) goto out; } drc->drc_guid_to_ds_map = ra.guid_to_ds_map; } /* * Read records and process them. */ pcksum = ra.cksum; while (ra.err == 0 && NULL != (drr = restore_read(&ra, sizeof (*drr)))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { ra.err = EINTR; goto out; } if (ra.byteswap) backup_byteswap(drr); switch (drr->drr_type) { case DRR_OBJECT: { /* * We need to make a copy of the record header, * because restore_{object,write} may need to * restore_read(), which will invalidate drr. */ struct drr_object drro = drr->drr_u.drr_object; ra.err = restore_object(&ra, os, &drro); break; } case DRR_FREEOBJECTS: { struct drr_freeobjects drrfo = drr->drr_u.drr_freeobjects; ra.err = restore_freeobjects(&ra, os, &drrfo); break; } case DRR_WRITE: { struct drr_write drrw = drr->drr_u.drr_write; ra.err = restore_write(&ra, os, &drrw); break; } case DRR_WRITE_BYREF: { struct drr_write_byref drrwbr = drr->drr_u.drr_write_byref; ra.err = restore_write_byref(&ra, os, &drrwbr); break; } case DRR_FREE: { struct drr_free drrf = drr->drr_u.drr_free; ra.err = restore_free(&ra, os, &drrf); break; } case DRR_END: { struct drr_end drre = drr->drr_u.drr_end; /* * We compare against the *previous* checksum * value, because the stored checksum is of * everything before the DRR_END record. */ if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) ra.err = ECKSUM; goto out; } case DRR_SPILL: { struct drr_spill drrs = drr->drr_u.drr_spill; ra.err = restore_spill(&ra, os, &drrs); break; } default: ra.err = EINVAL; goto out; } pcksum = ra.cksum; } ASSERT(ra.err != 0); out: if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) zfs_onexit_fd_rele(cleanup_fd); if (ra.err != 0) { /* * destroy what we created, so we don't leave it in the * inconsistent restoring state. */ txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); if (drc->drc_real_ds != drc->drc_logical_ds) { mutex_exit(&drc->drc_logical_ds->ds_recvlock); dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); } } vmem_free(ra.buf, ra.bufsize); *voffp = ra.voff; return (ra.err); } struct recvendsyncarg { char *tosnap; uint64_t creation_time; uint64_t toguid; }; static int recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct recvendsyncarg *resa = arg2; return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); } static void recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct recvendsyncarg *resa = arg2; dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); /* set snapshot's creation time and guid */ dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; ds->ds_prev->ds_phys->ds_guid = resa->toguid; ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; } static int add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) { dsl_pool_t *dp = ds->ds_dir->dd_pool; uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; dsl_dataset_t *snapds; guid_map_entry_t *gmep; int err; ASSERT(guid_map != NULL); rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); if (err == 0) { gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); gmep->guid = snapds->ds_phys->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); } rw_exit(&dp->dp_config_rwlock); return (err); } static int dmu_recv_existing_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; - int err; + int err, myerr; /* * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() * expects it to have a ds_user_ptr (and zil), but clone_swap() * can close it. */ txg_wait_synced(ds->ds_dir->dd_pool, 0); if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, drc->drc_force); if (err) goto out; } else { mutex_exit(&ds->ds_recvlock); dsl_dataset_rele(ds, dmu_recv_tag); (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); return (EBUSY); } resa.creation_time = drc->drc_drrb->drr_creation_time; resa.toguid = drc->drc_drrb->drr_toguid; resa.tosnap = drc->drc_tosnap; err = dsl_sync_task_do(ds->ds_dir->dd_pool, recv_end_check, recv_end_sync, ds, &resa, 3); if (err) { /* swap back */ (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); } out: mutex_exit(&ds->ds_recvlock); if (err == 0 && drc->drc_guid_to_ds_map != NULL) (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); dsl_dataset_disown(ds, dmu_recv_tag); - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + ASSERT3U(myerr, ==, 0); return (err); } static int dmu_recv_new_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; int err; /* * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() * expects it to have a ds_user_ptr (and zil), but clone_swap() * can close it. */ txg_wait_synced(ds->ds_dir->dd_pool, 0); resa.creation_time = drc->drc_drrb->drr_creation_time; resa.toguid = drc->drc_drrb->drr_toguid; resa.tosnap = drc->drc_tosnap; err = dsl_sync_task_do(ds->ds_dir->dd_pool, recv_end_check, recv_end_sync, ds, &resa, 3); if (err) { /* clean up the fs we just recv'd into */ (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); } else { if (drc->drc_guid_to_ds_map != NULL) (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); /* release the hold from dmu_recv_begin */ dsl_dataset_disown(ds, dmu_recv_tag); } return (err); } int dmu_recv_end(dmu_recv_cookie_t *drc) { if (drc->drc_logical_ds != drc->drc_real_ds) return (dmu_recv_existing_end(drc)); else return (dmu_recv_new_end(drc)); } diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 2deec8cf1854..25c8ac6b1258 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -1,4091 +1,4301 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static char *dsl_reaper = "the grim reaper"; static dsl_checkfunc_t dsl_dataset_destroy_begin_check; static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; static dsl_syncfunc_t dsl_dataset_set_reservation_sync; #define SWITCH64(x, y) \ { \ uint64_t __tmp = (x); \ (x) = (y); \ (y) = __tmp; \ } #define DS_REF_MAX (1ULL << 62) #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been * partially accounted for in our ancestors. */ static int64_t parent_delta(dsl_dataset_t *ds, int64_t delta) { uint64_t old_bytes, new_bytes; if (ds->ds_reserved == 0) return (delta); old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); return (new_bytes - old_bytes); } void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) { int used, compressed, uncompressed; int64_t delta; used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); compressed = BP_GET_PSIZE(bp); uncompressed = BP_GET_UCSIZE(bp); dprintf_bp(bp, "ds=%p", ds); ASSERT(dmu_tx_is_syncing(tx)); /* It could have been compressed away to nothing */ if (BP_IS_HOLE(bp)) return; ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); if (ds == NULL) { /* * Account for the meta-objset space in its placeholder * dsl_dir. */ ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, used, compressed, uncompressed, tx); dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); return; } dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); delta = parent_delta(ds, used); ds->ds_phys->ds_used_bytes += used; ds->ds_phys->ds_compressed_bytes += compressed; ds->ds_phys->ds_uncompressed_bytes += uncompressed; ds->ds_phys->ds_unique_bytes += used; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, compressed, uncompressed, tx); dsl_dir_transfer_space(ds->ds_dir, used - delta, DD_USED_REFRSRV, DD_USED_HEAD, tx); mutex_exit(&ds->ds_dir->dd_lock); } int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, boolean_t async) { int used, compressed, uncompressed; if (BP_IS_HOLE(bp)) return (0); ASSERT(dmu_tx_is_syncing(tx)); ASSERT(bp->blk_birth <= tx->tx_txg); used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); compressed = BP_GET_PSIZE(bp); uncompressed = BP_GET_UCSIZE(bp); ASSERT(used > 0); if (ds == NULL) { /* * Account for the meta-objset space in its placeholder * dataset. */ dsl_free(tx->tx_pool, tx->tx_txg, bp); dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, -used, -compressed, -uncompressed, tx); dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); ASSERT(!dsl_dataset_is_snapshot(ds)); dmu_buf_will_dirty(ds->ds_dbuf, tx); if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { int64_t delta; dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); dsl_free(tx->tx_pool, tx->tx_txg, bp); mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); ASSERT(ds->ds_phys->ds_unique_bytes >= used || !DS_UNIQUE_IS_ACCURATE(ds)); delta = parent_delta(ds, -used); ds->ds_phys->ds_unique_bytes -= used; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, -compressed, -uncompressed, tx); dsl_dir_transfer_space(ds->ds_dir, -used - delta, DD_USED_REFRSRV, DD_USED_HEAD, tx); mutex_exit(&ds->ds_dir->dd_lock); } else { dprintf_bp(bp, "putting on dead list: %s", ""); if (async) { /* * We are here as part of zio's write done callback, * which means we're a zio interrupt thread. We can't * call dsl_deadlist_insert() now because it may block * waiting for I/O. Instead, put bp on the deferred * queue and let dsl_pool_sync() finish the job. */ bplist_append(&ds->ds_pending_deadlist, bp); } else { dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); } ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object && bp->blk_birth > ds->ds_prev->ds_phys->ds_prev_snap_txg) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); mutex_enter(&ds->ds_prev->ds_lock); ds->ds_prev->ds_phys->ds_unique_bytes += used; mutex_exit(&ds->ds_prev->ds_lock); } if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { dsl_dir_transfer_space(ds->ds_dir, used, DD_USED_HEAD, DD_USED_SNAP, tx); } } mutex_enter(&ds->ds_lock); ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); ds->ds_phys->ds_used_bytes -= used; ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); ds->ds_phys->ds_compressed_bytes -= compressed; ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); ds->ds_phys->ds_uncompressed_bytes -= uncompressed; mutex_exit(&ds->ds_lock); return (used); } uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) { uint64_t trysnap = 0; if (ds == NULL) return (0); /* * The snapshot creation could fail, but that would cause an * incorrect FALSE return, which would only result in an * overestimation of the amount of space that an operation would * consume, which is OK. * * There's also a small window where we could miss a pending * snapshot, because we could set the sync task in the quiescing * phase. So this should only be used as a guess. */ if (ds->ds_trysnap_txg > spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) trysnap = ds->ds_trysnap_txg; return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); } boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, uint64_t blk_birth) { if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) return (B_FALSE); ddt_prefetch(dsl_dataset_get_spa(ds), bp); return (B_TRUE); } /* ARGSUSED */ static void dsl_dataset_evict(dmu_buf_t *db, void *dsv) { dsl_dataset_t *ds = dsv; ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); unique_remove(ds->ds_fsid_guid); if (ds->ds_objset != NULL) dmu_objset_evict(ds->ds_objset); if (ds->ds_prev) { dsl_dataset_drop_ref(ds->ds_prev, ds); ds->ds_prev = NULL; } bplist_destroy(&ds->ds_pending_deadlist); if (db != NULL) { dsl_deadlist_close(&ds->ds_deadlist); } else { ASSERT(ds->ds_deadlist.dl_dbuf == NULL); ASSERT(!ds->ds_deadlist.dl_oldfmt); } if (ds->ds_dir) dsl_dir_close(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); kmem_free(ds, sizeof (dsl_dataset_t)); } static int dsl_dataset_get_snapname(dsl_dataset_t *ds) { dsl_dataset_phys_t *headphys; int err; dmu_buf_t *headdbuf; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; if (ds->ds_snapname[0]) return (0); if (ds->ds_phys->ds_next_snap_obj == 0) return (0); err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &headdbuf); if (err) return (err); headphys = headdbuf->db_data; err = zap_value_search(dp->dp_meta_objset, headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); dmu_buf_rele(headdbuf, FTAG); return (err); } static int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; matchtype_t mt; int err; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; err = zap_lookup_norm(mos, snapobj, name, 8, 1, value, mt, NULL, 0, NULL); if (err == ENOTSUP && mt == MT_FIRST) err = zap_lookup(mos, snapobj, name, 8, 1, value); return (err); } static int dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; matchtype_t mt; int err; dsl_dir_snap_cmtime_update(ds->ds_dir); if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; err = zap_remove_norm(mos, snapobj, name, mt, tx); if (err == ENOTSUP && mt == MT_FIRST) err = zap_remove(mos, snapobj, name, tx); return (err); } static int dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; dmu_buf_t *dbuf; dsl_dataset_t *ds; int err; dmu_object_info_t doi; ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || dsl_pool_sync_context(dp)); err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); if (err) return (err); /* Make sure dsobj has the correct object type. */ dmu_object_info_from_db(dbuf, &doi); if (doi.doi_type != DMU_OT_DSL_DATASET) return (EINVAL); ds = dmu_buf_get_user(dbuf); if (ds == NULL) { dsl_dataset_t *winner = NULL; ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds->ds_dbuf = dbuf; ds->ds_object = dsobj; ds->ds_phys = dbuf->db_data; list_link_init(&ds->ds_synced_link); mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL); cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); bplist_create(&ds->ds_pending_deadlist); dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); if (err == 0) { err = dsl_dir_open_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); } if (err) { mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); return (err); } if (!dsl_dataset_is_snapshot(ds)) { ds->ds_snapname[0] = '\0'; if (ds->ds_phys->ds_prev_snap_obj) { err = dsl_dataset_get_ref(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev); } } else { if (zfs_flags & ZFS_DEBUG_SNAPNAMES) err = dsl_dataset_get_snapname(ds); if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { err = zap_count( ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_userrefs_obj, &ds->ds_userrefs); } } if (err == 0 && !dsl_dataset_is_snapshot(ds)) { /* * In sync context, we're called with either no lock * or with the write lock. If we're not syncing, * we're always called with the read lock held. */ boolean_t need_lock = !RW_WRITE_HELD(&dp->dp_config_rwlock) && dsl_pool_sync_context(dp); if (need_lock) rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_prop_get_ds(ds, "refreservation", sizeof (uint64_t), 1, &ds->ds_reserved, NULL); if (err == 0) { err = dsl_prop_get_ds(ds, "refquota", sizeof (uint64_t), 1, &ds->ds_quota, NULL); } if (need_lock) rw_exit(&dp->dp_config_rwlock); } else { ds->ds_reserved = ds->ds_quota = 0; } if (err == 0) { winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, dsl_dataset_evict); } if (err || winner) { bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) dsl_dataset_drop_ref(ds->ds_prev, ds); dsl_dir_close(ds->ds_dir, ds); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); rw_destroy(&ds->ds_rwlock); cv_destroy(&ds->ds_exclusive_cv); kmem_free(ds, sizeof (dsl_dataset_t)); if (err) { dmu_buf_rele(dbuf, tag); return (err); } ds = winner; } else { ds->ds_fsid_guid = unique_insert(ds->ds_phys->ds_fsid_guid); } } ASSERT3P(ds->ds_dbuf, ==, dbuf); ASSERT3P(ds->ds_phys, ==, dbuf->db_data); ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); mutex_enter(&ds->ds_lock); if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { mutex_exit(&ds->ds_lock); dmu_buf_rele(ds->ds_dbuf, tag); return (ENOENT); } mutex_exit(&ds->ds_lock); *dsp = ds; return (0); } static int dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) { dsl_pool_t *dp = ds->ds_dir->dd_pool; /* * In syncing context we don't want the rwlock lock: there * may be an existing writer waiting for sync phase to * finish. We don't need to worry about such writers, since * sync phase is single-threaded, so the writer can't be * doing anything while we are active. */ if (dsl_pool_sync_context(dp)) { ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); return (0); } /* * Normal users will hold the ds_rwlock as a READER until they * are finished (i.e., call dsl_dataset_rele()). "Owners" will * drop their READER lock after they set the ds_owner field. * * If the dataset is being destroyed, the destroy thread will * obtain a WRITER lock for exclusive access after it's done its * open-context work and then change the ds_owner to * dsl_reaper once destruction is assured. So threads * may block here temporarily, until the "destructability" of * the dataset is determined. */ ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); mutex_enter(&ds->ds_lock); while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { rw_exit(&dp->dp_config_rwlock); cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); if (DSL_DATASET_IS_DESTROYED(ds)) { mutex_exit(&ds->ds_lock); dsl_dataset_drop_ref(ds, tag); rw_enter(&dp->dp_config_rwlock, RW_READER); return (ENOENT); } /* * The dp_config_rwlock lives above the ds_lock. And * we need to check DSL_DATASET_IS_DESTROYED() while * holding the ds_lock, so we have to drop and reacquire * the ds_lock here. */ mutex_exit(&ds->ds_lock); rw_enter(&dp->dp_config_rwlock, RW_READER); mutex_enter(&ds->ds_lock); } mutex_exit(&ds->ds_lock); return (0); } int dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) { int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); if (err) return (err); return (dsl_dataset_hold_ref(*dsp, tag)); } int dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp) { int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); if (err) return (err); if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { dsl_dataset_rele(*dsp, tag); *dsp = NULL; return (EBUSY); } return (0); } int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; dsl_pool_t *dp; const char *snapname; uint64_t obj; int err = 0; err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); if (err) return (err); dp = dd->dd_pool; obj = dd->dd_phys->dd_head_dataset_obj; rw_enter(&dp->dp_config_rwlock, RW_READER); if (obj) err = dsl_dataset_get_ref(dp, obj, tag, dsp); else err = ENOENT; if (err) goto out; err = dsl_dataset_hold_ref(*dsp, tag); /* we may be looking for a snapshot */ if (err == 0 && snapname != NULL) { dsl_dataset_t *ds = NULL; if (*snapname++ != '@') { dsl_dataset_rele(*dsp, tag); err = ENOENT; goto out; } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); if (err == 0) err = dsl_dataset_get_ref(dp, obj, tag, &ds); dsl_dataset_rele(*dsp, tag); ASSERT3U((err == 0), ==, (ds != NULL)); if (ds) { mutex_enter(&ds->ds_lock); if (ds->ds_snapname[0] == 0) (void) strlcpy(ds->ds_snapname, snapname, sizeof (ds->ds_snapname)); mutex_exit(&ds->ds_lock); err = dsl_dataset_hold_ref(ds, tag); *dsp = err ? NULL : ds; } } out: rw_exit(&dp->dp_config_rwlock); dsl_dir_close(dd, FTAG); return (err); } int dsl_dataset_own(const char *name, boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp) { int err = dsl_dataset_hold(name, tag, dsp); if (err) return (err); if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { dsl_dataset_rele(*dsp, tag); return (EBUSY); } return (0); } void dsl_dataset_name(dsl_dataset_t *ds, char *name) { if (ds == NULL) { (void) strcpy(name, "mos"); } else { dsl_dir_name(ds->ds_dir, name); VERIFY(0 == dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { (void) strcat(name, "@"); /* * We use a "recursive" mutex so that we * can call dprintf_ds() with ds_lock held. */ if (!MUTEX_HELD(&ds->ds_lock)) { mutex_enter(&ds->ds_lock); (void) strcat(name, ds->ds_snapname); mutex_exit(&ds->ds_lock); } else { (void) strcat(name, ds->ds_snapname); } } } } static int dsl_dataset_namelen(dsl_dataset_t *ds) { int result; if (ds == NULL) { result = 3; /* "mos" */ } else { result = dsl_dir_namelen(ds->ds_dir); VERIFY(0 == dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { ++result; /* adding one for the @-sign */ if (!MUTEX_HELD(&ds->ds_lock)) { mutex_enter(&ds->ds_lock); result += strlen(ds->ds_snapname); mutex_exit(&ds->ds_lock); } else { result += strlen(ds->ds_snapname); } } } return (result); } void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) { dmu_buf_rele(ds->ds_dbuf, tag); } void dsl_dataset_rele(dsl_dataset_t *ds, void *tag) { if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { rw_exit(&ds->ds_rwlock); } dsl_dataset_drop_ref(ds, tag); } void dsl_dataset_disown(dsl_dataset_t *ds, void *tag) { ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); mutex_enter(&ds->ds_lock); ds->ds_owner = NULL; if (RW_WRITE_HELD(&ds->ds_rwlock)) { rw_exit(&ds->ds_rwlock); cv_broadcast(&ds->ds_exclusive_cv); } mutex_exit(&ds->ds_lock); if (ds->ds_dbuf) dsl_dataset_drop_ref(ds, tag); else dsl_dataset_evict(NULL, ds); } boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) { boolean_t gotit = FALSE; mutex_enter(&ds->ds_lock); if (ds->ds_owner == NULL && (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { ds->ds_owner = tag; if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) rw_exit(&ds->ds_rwlock); gotit = TRUE; } mutex_exit(&ds->ds_lock); return (gotit); } void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) { ASSERT3P(owner, ==, ds->ds_owner); if (!RW_WRITE_HELD(&ds->ds_rwlock)) rw_enter(&ds->ds_rwlock, RW_WRITER); } uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; uint64_t dsobj; objset_t *mos = dp->dp_meta_objset; if (origin == NULL) origin = dp->dp_origin_snap; ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); dsphys->ds_dir_obj = dd->dd_object; dsphys->ds_flags = flags; dsphys->ds_fsid_guid = unique_create(); (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_snapnames_zapobj = zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; if (origin == NULL) { dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); } else { dsl_dataset_t *ohds; dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = origin->ds_phys->ds_creation_txg; dsphys->ds_used_bytes = origin->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = origin->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = origin->ds_phys->ds_uncompressed_bytes; dsphys->ds_bp = origin->ds_phys->ds_bp; dsphys->ds_flags |= origin->ds_phys->ds_flags; dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); dsl_dataset_rele(ohds, FTAG); if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { if (origin->ds_phys->ds_next_clones_obj == 0) { origin->ds_phys->ds_next_clones_obj = zap_create(mos, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY(0 == zap_add_int(mos, origin->ds_phys->ds_next_clones_obj, dsobj, tx)); } dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_origin_obj = origin->ds_object; if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { if (origin->ds_dir->dd_phys->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); origin->ds_dir->dd_phys->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY3U(0, ==, zap_add_int(mos, origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); } } if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; dmu_buf_rele(dbuf, FTAG); dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_head_dataset_obj = dsobj; return (dsobj); } uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) { dsl_pool_t *dp = pdd->dd_pool; uint64_t dsobj, ddobj; dsl_dir_t *dd; ASSERT(lastname[0] != '@'); ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); dsl_deleg_set_create_perms(dd, tx, cr); dsl_dir_close(dd, FTAG); /* * If we are creating a clone, make sure we zero out any stale * data from the origin snapshots zil header. */ if (origin != NULL) { dsl_dataset_t *ds; objset_t *os; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); bzero(&os->os_zil_header, sizeof (os->os_zil_header)); dsl_dataset_dirty(ds, tx); dsl_dataset_rele(ds, FTAG); } return (dsobj); } -struct destroyarg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *failed; - boolean_t defer; -}; - -static int -dsl_snapshot_destroy_one(const char *name, void *arg) -{ - struct destroyarg *da = arg; - dsl_dataset_t *ds; - int err; - char *dsname; - - dsname = kmem_asprintf("%s@%s", name, da->snapname); - err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); - strfree(dsname); - if (err == 0) { - struct dsl_ds_destroyarg *dsda; - - dsl_dataset_make_exclusive(ds, da->dstg); - dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); - dsda->ds = ds; - dsda->defer = da->defer; - dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, dsda, da->dstg, 0); - } else if (err == ENOENT) { - err = 0; - } else { - (void) strcpy(da->failed, name); - } - return (err); -} - /* - * Destroy 'snapname' in all descendants of 'fsname'. + * The snapshots must all be in the same pool. */ -#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy int -dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) { int err; - struct destroyarg da; dsl_sync_task_t *dst; spa_t *spa; + nvpair_t *pair; + dsl_sync_task_group_t *dstg; - err = spa_open(fsname, &spa, FTAG); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + + err = spa_open(nvpair_name(pair), &spa, FTAG); if (err) return (err); - da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - da.snapname = snapname; - da.failed = fsname; - da.defer = defer; + dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - err = dmu_objset_find(fsname, - dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); + if (err == 0) { + struct dsl_ds_destroyarg *dsda; + + dsl_dataset_make_exclusive(ds, dstg); + dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), + KM_SLEEP); + dsda->ds = ds; + dsda->defer = defer; + dsl_sync_task_create(dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, dsda, dstg, 0); + } else if (err == ENOENT) { + err = 0; + } else { + (void) strcpy(failed, nvpair_name(pair)); + break; + } + } if (err == 0) - err = dsl_sync_task_group_wait(da.dstg); + err = dsl_sync_task_group_wait(dstg); - for (dst = list_head(&da.dstg->dstg_tasks); dst; - dst = list_next(&da.dstg->dstg_tasks, dst)) { + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { struct dsl_ds_destroyarg *dsda = dst->dst_arg1; dsl_dataset_t *ds = dsda->ds; /* * Return the file system name that triggered the error */ if (dst->dst_err) { - dsl_dataset_name(ds, fsname); - *strchr(fsname, '@') = '\0'; + dsl_dataset_name(ds, failed); } ASSERT3P(dsda->rm_origin, ==, NULL); - dsl_dataset_disown(ds, da.dstg); + dsl_dataset_disown(ds, dstg); kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); } - dsl_sync_task_group_destroy(da.dstg); + dsl_sync_task_group_destroy(dstg); spa_close(spa, FTAG); return (err); + } static boolean_t dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) { boolean_t might_destroy = B_FALSE; mutex_enter(&ds->ds_lock); if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && DS_IS_DEFER_DESTROY(ds)) might_destroy = B_TRUE; mutex_exit(&ds->ds_lock); return (might_destroy); } /* * If we're removing a clone, and these three conditions are true: * 1) the clone's origin has no other children * 2) the clone's origin has no user references * 3) the clone's origin has been marked for deferred destruction * Then, prepare to remove the origin as part of this sync task group. */ static int dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) { dsl_dataset_t *ds = dsda->ds; dsl_dataset_t *origin = ds->ds_prev; if (dsl_dataset_might_destroy_origin(origin)) { char *name; int namelen; int error; namelen = dsl_dataset_namelen(origin) + 1; name = kmem_alloc(namelen, KM_SLEEP); dsl_dataset_name(origin, name); #ifdef _KERNEL error = zfs_unmount_snap(name, NULL); if (error) { kmem_free(name, namelen); return (error); } #endif error = dsl_dataset_own(name, B_TRUE, tag, &origin); kmem_free(name, namelen); if (error) return (error); dsda->rm_origin = origin; dsl_dataset_make_exclusive(origin, tag); } return (0); } /* * ds must be opened as OWNER. On return (whether successful or not), * ds will be closed and caller can no longer dereference it. */ int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) { int err; dsl_sync_task_group_t *dstg; objset_t *os; dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; dsl_dataset_t *dummy_ds; dsda.ds = ds; if (dsl_dataset_is_snapshot(ds)) { /* Destroying a snapshot is simpler */ dsl_dataset_make_exclusive(ds, tag); dsda.defer = defer; err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); ASSERT3P(dsda.rm_origin, ==, NULL); goto out; } else if (defer) { err = EINVAL; goto out; } dd = ds->ds_dir; dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); dummy_ds->ds_dir = dd; dummy_ds->ds_object = ds->ds_object; /* * Check for errors and mark this ds as inconsistent, in * case we crash while freeing the objects. */ err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); if (err) goto out_free; err = dmu_objset_from_ds(ds, &os); if (err) goto out_free; /* * remove the objects in open context, so that we won't * have too much to do in syncing context. */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, ds->ds_phys->ds_prev_snap_txg)) { /* * Ignore errors, if there is not enough disk space * we will deal with it in dsl_dataset_destroy_sync(). */ (void) dmu_free_object(os, obj); } if (err != ESRCH) goto out_free; /* * Only the ZIL knows how to free log blocks. */ zil_destroy(dmu_objset_zil(os), B_FALSE); /* * Sync out all in-flight IO. */ txg_wait_synced(dd->dd_pool, 0); /* * If we managed to free all the objects in open * context, the user space accounting should be zero. */ if (ds->ds_phys->ds_bp.blk_fill == 0 && dmu_objset_userused_enabled(os)) { ASSERTV(uint64_t count); ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || count == 0); ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || count == 0); } rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); rw_exit(&dd->dd_pool->dp_config_rwlock); if (err) goto out_free; /* * Blow away the dsl_dir + head dataset. */ dsl_dataset_make_exclusive(ds, tag); /* * If we're removing a clone, we might also need to remove its * origin. */ do { dsda.need_prep = B_FALSE; if (dsl_dir_is_clone(dd)) { err = dsl_dataset_origin_rm_prep(&dsda, tag); if (err) { dsl_dir_close(dd, FTAG); goto out_free; } } dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, dsl_dir_destroy_sync, dummy_ds, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); /* * We could be racing against 'zfs release' or 'zfs destroy -d' * on the origin snap, in which case we can get EBUSY if we * needed to destroy the origin snap but were not ready to * do so. */ if (dsda.need_prep) { ASSERT(err == EBUSY); ASSERT(dsl_dir_is_clone(dd)); ASSERT(dsda.rm_origin == NULL); } } while (dsda.need_prep); if (dsda.rm_origin != NULL) dsl_dataset_disown(dsda.rm_origin, tag); /* if it is successful, dsl_dir_destroy_sync will close the dd */ if (err) dsl_dir_close(dd, FTAG); out_free: kmem_free(dummy_ds, sizeof (dsl_dataset_t)); out: dsl_dataset_disown(ds, tag); return (err); } blkptr_t * dsl_dataset_get_blkptr(dsl_dataset_t *ds) { return (&ds->ds_phys->ds_bp); } void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); /* If it's the meta-objset, set dp_meta_rootbp */ if (ds == NULL) { tx->tx_pool->dp_meta_rootbp = *bp; } else { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_bp = *bp; } } spa_t * dsl_dataset_get_spa(dsl_dataset_t *ds) { return (ds->ds_dir->dd_pool->dp_spa); } void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_pool_t *dp; if (ds == NULL) /* this is the meta-objset */ return; ASSERT(ds->ds_objset != NULL); if (ds->ds_phys->ds_next_snap_obj != 0) panic("dirtying snapshot!"); dp = ds->ds_dir->dd_pool; if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); } } /* * The unique space in the head dataset can be calculated by subtracting * the space used in the most recent snapshot, that is still being used * in this file system, from the space currently in use. To figure out * the space in the most recent snapshot still in use, we need to take * the total space used in the snapshot and subtract out the space that * has been freed up since the snapshot was taken. */ static void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { uint64_t mrs_used; uint64_t dlused, dlcomp, dluncomp; ASSERT(!dsl_dataset_is_snapshot(ds)); if (ds->ds_phys->ds_prev_snap_obj != 0) mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; else mrs_used = 0; dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); ASSERT3U(dlused, <=, mrs_used); ds->ds_phys->ds_unique_bytes = ds->ds_phys->ds_used_bytes - (mrs_used - dlused); if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; } struct killarg { dsl_dataset_t *ds; dmu_tx_t *tx; }; /* ARGSUSED */ static int kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { struct killarg *ka = arg; dmu_tx_t *tx = ka->tx; if (bp == NULL) return (0); if (zb->zb_level == ZB_ZIL_LEVEL) { ASSERT(zilog != NULL); /* * It's a block in the intent log. It has no * accounting, so just free it. */ dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { ASSERT(zilog == NULL); ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); } return (0); } /* ARGSUSED */ static int dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t count; int err; /* * Can't delete a head dataset if there are snapshots of it. * (Except if the only snapshots are from the branch we cloned * from.) */ if (ds->ds_prev != NULL && ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) return (EBUSY); /* * This is really a dsl_dir thing, but check it here so that * we'll be less likely to leave this dataset inconsistent & * nearly destroyed. */ err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); if (err) return (err); if (count != 0) return (EEXIST); return (0); } /* ARGSUSED */ static void dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, "dataset = %llu", ds->ds_object); } static int dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, dmu_tx_t *tx) { dsl_dataset_t *ds = dsda->ds; dsl_dataset_t *ds_prev = ds->ds_prev; if (dsl_dataset_might_destroy_origin(ds_prev)) { struct dsl_ds_destroyarg ndsda = {0}; /* * If we're not prepared to remove the origin, don't remove * the clone either. */ if (dsda->rm_origin == NULL) { dsda->need_prep = B_TRUE; return (EBUSY); } ndsda.ds = ds_prev; ndsda.is_origin_rm = B_TRUE; return (dsl_dataset_destroy_check(&ndsda, tag, tx)); } /* * If we're not going to remove the origin after all, * undo the open context setup. */ if (dsda->rm_origin != NULL) { dsl_dataset_disown(dsda->rm_origin, tag); dsda->rm_origin = NULL; } return (0); } /* * If you add new checks here, you may need to add * additional checks to the "temporary" case in * snapshot_check() in dmu_objset.c. */ /* ARGSUSED */ int dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { struct dsl_ds_destroyarg *dsda = arg1; dsl_dataset_t *ds = dsda->ds; /* we have an owner hold, so noone else can destroy us */ ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); /* * Only allow deferred destroy on pools that support it. * NOTE: deferred destroy is only supported on snapshots. */ if (dsda->defer) { if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) return (ENOTSUP); ASSERT(dsl_dataset_is_snapshot(ds)); return (0); } /* * Can't delete a head dataset if there are snapshots of it. * (Except if the only snapshots are from the branch we cloned * from.) */ if (ds->ds_prev != NULL && ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) return (EBUSY); /* * If we made changes this txg, traverse_dsl_dataset won't find * them. Try again. */ if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) return (EAGAIN); if (dsl_dataset_is_snapshot(ds)) { /* * If this snapshot has an elevated user reference count, * we can't destroy it yet. */ if (ds->ds_userrefs > 0 && !dsda->releasing) return (EBUSY); mutex_enter(&ds->ds_lock); /* * Can't delete a branch point. However, if we're destroying * a clone and removing its origin due to it having a user * hold count of 0 and having been marked for deferred destroy, * it's OK for the origin to have a single clone. */ if (ds->ds_phys->ds_num_children > (dsda->is_origin_rm ? 2 : 1)) { mutex_exit(&ds->ds_lock); return (EEXIST); } mutex_exit(&ds->ds_lock); } else if (dsl_dir_is_clone(ds->ds_dir)) { return (dsl_dataset_origin_check(dsda, arg2, tx)); } /* XXX we should do some i/o error checking... */ return (0); } struct refsarg { kmutex_t lock; boolean_t gone; kcondvar_t cv; }; /* ARGSUSED */ static void dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) { struct refsarg *arg = argv; mutex_enter(&arg->lock); arg->gone = TRUE; cv_signal(&arg->cv); mutex_exit(&arg->lock); } static void dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) { struct refsarg arg; mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); arg.gone = FALSE; (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, dsl_dataset_refs_gone); dmu_buf_rele(ds->ds_dbuf, tag); mutex_enter(&arg.lock); while (!arg.gone) cv_wait(&arg.cv, &arg.lock); ASSERT(arg.gone); mutex_exit(&arg.lock); ds->ds_dbuf = NULL; ds->ds_phys = NULL; mutex_destroy(&arg.lock); cv_destroy(&arg.cv); } static void remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int err; ASSERTV(uint64_t count); ASSERT(ds->ds_phys->ds_num_children >= 2); err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); /* * The err should not be ENOENT, but a bug in a previous version * of the code could cause upgrade_clones_cb() to not set * ds_next_snap_obj when it should, leading to a missing entry. * If we knew that the pool was created after * SPA_VERSION_NEXT_CLONES, we could assert that it isn't * ENOENT. However, at least we can check that we don't have * too many entries in the next_clones_obj even after failing to * remove this one. */ if (err != ENOENT) { VERIFY3U(err, ==, 0); } ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, &count)); ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); } static void dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; zap_cursor_t zc; zap_attribute_t za; /* * If it is the old version, dd_clones doesn't exist so we can't * find the clones, but deadlist_remove_key() is a no-op so it * doesn't matter. */ if (ds->ds_dir->dd_phys->dd_clones == 0) return; for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, za.za_first_integer, FTAG, &clone)); if (clone->ds_dir->dd_origin_txg > mintxg) { dsl_deadlist_remove_key(&clone->ds_deadlist, mintxg, tx); dsl_dataset_remove_clones_key(clone, mintxg, tx); } dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); } struct process_old_arg { dsl_dataset_t *ds; dsl_dataset_t *ds_prev; boolean_t after_branch_point; zio_t *pio; uint64_t used, comp, uncomp; }; static int process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { struct process_old_arg *poa = arg; dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); if (poa->ds_prev && !poa->after_branch_point && bp->blk_birth > poa->ds_prev->ds_phys->ds_prev_snap_txg) { poa->ds_prev->ds_phys->ds_unique_bytes += bp_get_dsize_sync(dp->dp_spa, bp); } } else { poa->used += bp_get_dsize_sync(dp->dp_spa, bp); poa->comp += BP_GET_PSIZE(bp); poa->uncomp += BP_GET_UCSIZE(bp); dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); } return (0); } static void process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) { struct process_old_arg poa = { 0 }; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; ASSERT(ds->ds_deadlist.dl_oldfmt); ASSERT(ds_next->ds_deadlist.dl_oldfmt); poa.ds = ds; poa.ds_prev = ds_prev; poa.after_branch_point = after_branch_point; poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, process_old_cb, &poa, tx)); VERIFY3U(zio_wait(poa.pio), ==, 0); ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); /* change snapused */ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, -poa.used, -poa.comp, -poa.uncomp, tx); /* swap next's deadlist to our deadlist */ dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_close(&ds_next->ds_deadlist); SWITCH64(ds_next->ds_phys->ds_deadlist_obj, ds->ds_phys->ds_deadlist_obj); dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); dsl_deadlist_open(&ds_next->ds_deadlist, mos, ds_next->ds_phys->ds_deadlist_obj); } void dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { struct dsl_ds_destroyarg *dsda = arg1; dsl_dataset_t *ds = dsda->ds; int err; int after_branch_point = FALSE; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; dsl_dataset_t *ds_prev = NULL; boolean_t wont_destroy; uint64_t obj; wont_destroy = (dsda->defer && (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); ASSERT(ds->ds_owner || wont_destroy); ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); ASSERT(ds->ds_prev == NULL || ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); if (wont_destroy) { ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; return; } /* signal any waiters that this dataset is going away */ mutex_enter(&ds->ds_lock); ds->ds_owner = dsl_reaper; cv_broadcast(&ds->ds_exclusive_cv); mutex_exit(&ds->ds_lock); /* Remove our reservation */ if (ds->ds_reserved != 0) { dsl_prop_setarg_t psa; uint64_t value = 0; dsl_prop_setarg_init_uint64(&psa, "refreservation", (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), &value); psa.psa_effective_value = 0; /* predict default value */ dsl_dataset_set_reservation_sync(ds, &psa, tx); ASSERT3U(ds->ds_reserved, ==, 0); } ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); dsl_scan_ds_destroyed(ds, tx); obj = ds->ds_object; if (ds->ds_phys->ds_prev_snap_obj != 0) { if (ds->ds_prev) { ds_prev = ds->ds_prev; } else { VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); } after_branch_point = (ds_prev->ds_phys->ds_next_snap_obj != obj); dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); if (after_branch_point && ds_prev->ds_phys->ds_next_clones_obj != 0) { remove_from_next_clones(ds_prev, obj, tx); if (ds->ds_phys->ds_next_snap_obj != 0) { VERIFY(0 == zap_add_int(mos, ds_prev->ds_phys->ds_next_clones_obj, ds->ds_phys->ds_next_snap_obj, tx)); } } if (after_branch_point && ds->ds_phys->ds_next_snap_obj == 0) { /* This clone is toast. */ ASSERT(ds_prev->ds_phys->ds_num_children > 1); ds_prev->ds_phys->ds_num_children--; /* * If the clone's origin has no other clones, no * user holds, and has been marked for deferred * deletion, then we should have done the necessary * destroy setup for it. */ if (ds_prev->ds_phys->ds_num_children == 1 && ds_prev->ds_userrefs == 0 && DS_IS_DEFER_DESTROY(ds_prev)) { ASSERT3P(dsda->rm_origin, !=, NULL); } else { ASSERT3P(dsda->rm_origin, ==, NULL); } } else if (!after_branch_point) { ds_prev->ds_phys->ds_next_snap_obj = ds->ds_phys->ds_next_snap_obj; } } if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_t *ds_next; uint64_t old_unique; uint64_t used = 0, comp = 0, uncomp = 0; VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); old_unique = ds_next->ds_phys->ds_unique_bytes; dmu_buf_will_dirty(ds_next->ds_dbuf, tx); ds_next->ds_phys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; ds_next->ds_phys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); if (ds_next->ds_deadlist.dl_oldfmt) { process_old_deadlist(ds, ds_prev, ds_next, after_branch_point, tx); } else { /* Adjust prev's unique space. */ if (ds_prev && !after_branch_point) { dsl_deadlist_space_range(&ds_next->ds_deadlist, ds_prev->ds_phys->ds_prev_snap_txg, ds->ds_phys->ds_prev_snap_txg, &used, &comp, &uncomp); ds_prev->ds_phys->ds_unique_bytes += used; } /* Adjust snapused. */ dsl_deadlist_space_range(&ds_next->ds_deadlist, ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &used, &comp, &uncomp); dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, -used, -comp, -uncomp, tx); /* Move blocks to be freed to pool's free list. */ dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, tx); dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); /* Merge our deadlist into next's and free it. */ dsl_deadlist_merge(&ds_next->ds_deadlist, ds->ds_phys->ds_deadlist_obj, tx); } dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); /* Collapse range in clone heads */ dsl_dataset_remove_clones_key(ds, ds->ds_phys->ds_creation_txg, tx); if (dsl_dataset_is_snapshot(ds_next)) { dsl_dataset_t *ds_nextnext; dsl_dataset_t *hds; /* * Update next's unique to include blocks which * were previously shared by only this snapshot * and it. Those blocks will be born after the * prev snap and before this snap, and will have * died after the next snap and before the one * after that (ie. be on the snap after next's * deadlist). */ VERIFY(0 == dsl_dataset_hold_obj(dp, ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext)); dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, ds->ds_phys->ds_prev_snap_txg, ds->ds_phys->ds_creation_txg, &used, &comp, &uncomp); ds_next->ds_phys->ds_unique_bytes += used; dsl_dataset_rele(ds_nextnext, FTAG); ASSERT3P(ds_next->ds_prev, ==, NULL); /* Collapse range in this head. */ VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds)); dsl_deadlist_remove_key(&hds->ds_deadlist, ds->ds_phys->ds_creation_txg, tx); dsl_dataset_rele(hds, FTAG); } else { ASSERT3P(ds_next->ds_prev, ==, ds); dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); ds_next->ds_prev = NULL; if (ds_prev) { VERIFY(0 == dsl_dataset_get_ref(dp, ds->ds_phys->ds_prev_snap_obj, ds_next, &ds_next->ds_prev)); } dsl_dataset_recalc_head_uniq(ds_next); /* * Reduce the amount of our unconsmed refreservation * being charged to our parent by the amount of * new unique data we have gained. */ if (old_unique < ds_next->ds_reserved) { int64_t mrsdelta; uint64_t new_unique = ds_next->ds_phys->ds_unique_bytes; ASSERT(old_unique <= new_unique); mrsdelta = MIN(new_unique - old_unique, ds_next->ds_reserved - old_unique); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); } } dsl_dataset_rele(ds_next, FTAG); } else { /* * There's no next snapshot, so this is a head dataset. * Destroy the deadlist. Unless it's a clone, the * deadlist should be empty. (If it's a clone, it's * safe to ignore the deadlist contents.) */ struct killarg ka; dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = 0; /* * Free everything that we point to (that's born after * the previous snapshot, if we are a clone) * * NB: this should be very quick, because we already * freed all the objects in open context. */ ka.ds = ds; ka.tx = tx; err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, kill_blkptr, &ka); ASSERT3U(err, ==, 0); ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); if (ds->ds_prev != NULL) { if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY3U(0, ==, zap_remove_int(mos, ds->ds_prev->ds_dir->dd_phys->dd_clones, ds->ds_object, tx)); } dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = ds_prev = NULL; } } /* * This must be done after the dsl_traverse(), because it will * re-open the objset. */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { /* Erase the link in the dir */ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); ASSERT(err == 0); } else { /* remove from snapshot namespace */ dsl_dataset_t *ds_head; ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); VERIFY(0 == dsl_dataset_get_snapname(ds)); #ifdef ZFS_DEBUG { uint64_t val; err = dsl_dataset_snap_lookup(ds_head, ds->ds_snapname, &val); ASSERT3U(err, ==, 0); ASSERT3U(val, ==, obj); } #endif err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); ASSERT(err == 0); dsl_dataset_rele(ds_head, FTAG); } if (ds_prev && ds->ds_prev != ds_prev) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, "dataset = %llu", ds->ds_object); if (ds->ds_phys->ds_next_clones_obj != 0) { ASSERTV(uint64_t count); ASSERT(0 == zap_count(mos, ds->ds_phys->ds_next_clones_obj, &count) && count == 0); VERIFY(0 == dmu_object_free(mos, ds->ds_phys->ds_next_clones_obj, tx)); } if (ds->ds_phys->ds_props_obj != 0) VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); if (ds->ds_phys->ds_userrefs_obj != 0) VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); dsl_dir_close(ds->ds_dir, ds); ds->ds_dir = NULL; dsl_dataset_drain_refs(ds, tag); VERIFY(0 == dmu_object_free(mos, obj, tx)); if (dsda->rm_origin) { /* * Remove the origin of the clone we just destroyed. */ struct dsl_ds_destroyarg ndsda = {0}; ndsda.ds = dsda->rm_origin; dsl_dataset_destroy_sync(&ndsda, tag, tx); } } static int dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t asize; if (!dmu_tx_is_syncing(tx)) return (0); /* * If there's an fs-only reservation, any blocks that might become * owned by the snapshot dataset must be accommodated by space * outside of the reservation. */ ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) return (ENOSPC); /* * Propogate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) dsl_dir_willuse_space(ds->ds_dir, asize, tx); return (0); } int dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; const char *snapname = arg2; int err; uint64_t value; /* * We don't allow multiple snapshots of the same txg. If there * is already one, try again. */ if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) return (EAGAIN); /* * Check for conflicting name snapshot name. */ err = dsl_dataset_snap_lookup(ds, snapname, &value); if (err == 0) return (EEXIST); if (err != ENOENT) return (err); /* * Check that the dataset's name is not too long. Name consists * of the dataset's length + 1 for the @-sign + snapshot name's length */ if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) return (ENAMETOOLONG); err = dsl_dataset_snapshot_reserve_space(ds, tx); if (err) return (err); ds->ds_trysnap_txg = tx->tx_txg; return (0); } void dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; uint64_t dsobj, crtxg; objset_t *mos = dp->dp_meta_objset; int err; ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); /* * The origin's ds_creation_txg has to be < TXG_INITIAL */ if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) crtxg = 1; else crtxg = tx->tx_txg; dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); dsphys->ds_dir_obj = ds->ds_dir->dd_object; dsphys->ds_fsid_guid = unique_create(); (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; dsphys->ds_next_snap_obj = ds->ds_object; dsphys->ds_num_children = 1; dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = crtxg; dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; dsphys->ds_flags = ds->ds_phys->ds_flags; dsphys->ds_bp = ds->ds_phys->ds_bp; dmu_buf_rele(dbuf, FTAG); ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); if (ds->ds_prev) { uint64_t next_clones_obj = ds->ds_prev->ds_phys->ds_next_clones_obj; ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object || ds->ds_prev->ds_phys->ds_num_children > 1); if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, ds->ds_prev->ds_phys->ds_creation_txg); ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; } else if (next_clones_obj != 0) { remove_from_next_clones(ds->ds_prev, dsphys->ds_next_snap_obj, tx); VERIFY3U(0, ==, zap_add_int(mos, next_clones_obj, dsobj, tx)); } } /* * If we have a reference-reservation on this dataset, we will * need to increase the amount of refreservation being charged * since our unique space is going to zero. */ if (ds->ds_reserved) { int64_t delta; ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); } dmu_buf_will_dirty(ds->ds_dbuf, tx); zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", ds->ds_dir->dd_myname, snapname, dsobj, ds->ds_phys->ds_prev_snap_txg); ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); dsl_deadlist_add_key(&ds->ds_deadlist, ds->ds_phys->ds_prev_snap_txg, tx); ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); ds->ds_phys->ds_prev_snap_obj = dsobj; ds->ds_phys->ds_prev_snap_txg = crtxg; ds->ds_phys->ds_unique_bytes = 0; if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &dsobj, tx); ASSERT(err == 0); if (ds->ds_prev) dsl_dataset_drop_ref(ds->ds_prev, ds); VERIFY(0 == dsl_dataset_get_ref(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); dsl_scan_ds_snapshotted(ds, tx); dsl_dir_snap_cmtime_update(ds->ds_dir); spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, "dataset = %llu", dsobj); } void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(ds->ds_objset != NULL); ASSERT(ds->ds_phys->ds_next_snap_obj == 0); /* * in case we had to change ds_fsid_guid when we opened it, * sync it out now. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; dsl_dir_dirty(ds->ds_dir, tx); dmu_objset_sync(ds->ds_objset, zio, tx); } +static void +get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t count = 0; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + nvlist_t *propval; + nvlist_t *val; + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + /* + * There may me missing entries in ds_next_clones_obj + * due to a bug in a previous version of the code. + * Only trust it if it has the right number of entries. + */ + if (ds->ds_phys->ds_next_clones_obj != 0) { + ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, + &count)); + } + if (count != ds->ds_phys->ds_num_children - 1) { + goto fail; + } + for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + char buf[ZFS_MAXNAMELEN]; + if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone) != 0) { + goto fail; + } + dsl_dir_name(clone->ds_dir, buf); + VERIFY(nvlist_add_boolean(val, buf) == 0); + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); + VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); + VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), + propval) == 0); +fail: + nvlist_free(val); + nvlist_free(propval); + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); +} + void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t refd, avail, uobjs, aobjs, ratio; dsl_dir_stats(ds->ds_dir, nv); dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, ds->ds_phys->ds_creation_time); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, ds->ds_phys->ds_creation_txg); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, ds->ds_quota); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, ds->ds_reserved); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, ds->ds_phys->ds_guid); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, ds->ds_phys->ds_unique_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, ds->ds_object); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + if (ds->ds_phys->ds_prev_snap_obj != 0) { + uint64_t written, comp, uncomp; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_dataset_t *prev; + int err; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + rw_exit(&dp->dp_config_rwlock); + if (err == 0) { + err = dsl_dataset_space_written(prev, ds, &written, + &comp, &uncomp); + dsl_dataset_rele(prev, FTAG); + if (err == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, + written); + } + } + } + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : (ds->ds_phys->ds_uncompressed_bytes * 100 / ds->ds_phys->ds_compressed_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); if (ds->ds_phys->ds_next_snap_obj) { /* * This is a snapshot; override the dd's space used with * our unique space and compression ratio. */ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, ds->ds_phys->ds_unique_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + + get_clones_stat(ds, nv); } } void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_next_snap_obj) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; } /* clone origin is really a dsl_dir thing... */ rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); if (dsl_dir_is_clone(ds->ds_dir)) { dsl_dataset_t *ods; VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); dsl_dataset_name(ods, stat->dds_origin); dsl_dataset_drop_ref(ods, FTAG); } else { stat->dds_origin[0] = '\0'; } rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds) { return (ds->ds_fsid_guid); } void dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp) { *refdbytesp = ds->ds_phys->ds_used_bytes; *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; if (ds->ds_quota != 0) { /* * Adjust available bytes according to refquota */ if (*refdbytesp < ds->ds_quota) *availbytesp = MIN(*availbytesp, ds->ds_quota - *refdbytesp); else *availbytesp = 0; } *usedobjsp = ds->ds_phys->ds_bp.blk_fill; *availobjsp = DN_MAX_OBJECT - *usedobjsp; } boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) { ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || dsl_pool_sync_context(dp)); if (ds->ds_prev == NULL) return (B_FALSE); if (ds->ds_phys->ds_bp.blk_birth > ds->ds_prev->ds_phys->ds_creation_txg) { objset_t *os, *os_prev; /* * It may be that only the ZIL differs, because it was * reset in the head. Don't count that as being * modified. */ if (dmu_objset_from_ds(ds, &os) != 0) return (B_TRUE); if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) return (B_TRUE); return (bcmp(&os->os_phys->os_meta_dnode, &os_prev->os_phys->os_meta_dnode, sizeof (os->os_phys->os_meta_dnode)) != 0); } return (B_FALSE); } /* ARGSUSED */ static int dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; char *newsnapname = arg2; dsl_dir_t *dd = ds->ds_dir; dsl_dataset_t *hds; uint64_t val; int err; err = dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); if (err) return (err); /* new name better not be in use */ err = dsl_dataset_snap_lookup(hds, newsnapname, &val); dsl_dataset_rele(hds, FTAG); if (err == 0) err = EEXIST; else if (err == ENOENT) err = 0; /* dataset name + 1 for the "@" + the new snapshot name must fit */ if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) err = ENAMETOOLONG; return (err); } static void dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; const char *newsnapname = arg2; dsl_dir_t *dd = ds->ds_dir; objset_t *mos = dd->dd_pool->dp_meta_objset; dsl_dataset_t *hds; int err; ASSERT(ds->ds_phys->ds_next_snap_obj != 0); VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); VERIFY(0 == dsl_dataset_get_snapname(ds)); err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); ASSERT3U(err, ==, 0); mutex_enter(&ds->ds_lock); (void) strcpy(ds->ds_snapname, newsnapname); mutex_exit(&ds->ds_lock); err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT3U(err, ==, 0); spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, "dataset = %llu", ds->ds_object); dsl_dataset_rele(hds, FTAG); } struct renamesnaparg { dsl_sync_task_group_t *dstg; char failed[MAXPATHLEN]; char *oldsnap; char *newsnap; }; static int dsl_snapshot_rename_one(const char *name, void *arg) { struct renamesnaparg *ra = arg; dsl_dataset_t *ds = NULL; char *snapname; int err; snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); /* * For recursive snapshot renames the parent won't be changing * so we just pass name for both the to/from argument. */ err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); if (err != 0) { strfree(snapname); return (err == ENOENT ? 0 : err); } #ifdef _KERNEL /* * For all filesystems undergoing rename, we'll need to unmount it. */ (void) zfs_unmount_snap(snapname, NULL); #endif err = dsl_dataset_hold(snapname, ra->dstg, &ds); strfree(snapname); if (err != 0) return (err == ENOENT ? 0 : err); dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); return (0); } static int dsl_recursive_rename(char *oldname, const char *newname) { int err; struct renamesnaparg *ra; dsl_sync_task_t *dst; spa_t *spa; char *cp, *fsname = spa_strdup(oldname); int len = strlen(oldname) + 1; /* truncate the snapshot name to get the fsname */ cp = strchr(fsname, '@'); *cp = '\0'; err = spa_open(fsname, &spa, FTAG); if (err) { kmem_free(fsname, len); return (err); } ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); ra->oldsnap = strchr(oldname, '@') + 1; ra->newsnap = strchr(newname, '@') + 1; *ra->failed = '\0'; err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, DS_FIND_CHILDREN); kmem_free(fsname, len); if (err == 0) { err = dsl_sync_task_group_wait(ra->dstg); } for (dst = list_head(&ra->dstg->dstg_tasks); dst; dst = list_next(&ra->dstg->dstg_tasks, dst)) { dsl_dataset_t *ds = dst->dst_arg1; if (dst->dst_err) { dsl_dir_name(ds->ds_dir, ra->failed); (void) strlcat(ra->failed, "@", sizeof (ra->failed)); (void) strlcat(ra->failed, ra->newsnap, sizeof (ra->failed)); } dsl_dataset_rele(ds, ra->dstg); } if (err) (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); dsl_sync_task_group_destroy(ra->dstg); kmem_free(ra, sizeof (struct renamesnaparg)); spa_close(spa, FTAG); return (err); } static int dsl_valid_rename(const char *oldname, void *arg) { int delta = *(int *)arg; if (strlen(oldname) + delta >= MAXNAMELEN) return (ENAMETOOLONG); return (0); } #pragma weak dmu_objset_rename = dsl_dataset_rename int dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) { dsl_dir_t *dd; dsl_dataset_t *ds; const char *tail; int err; err = dsl_dir_open(oldname, FTAG, &dd, &tail); if (err) return (err); if (tail == NULL) { int delta = strlen(newname) - strlen(oldname); /* if we're growing, validate child name lengths */ if (delta > 0) err = dmu_objset_find(oldname, dsl_valid_rename, &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); if (err == 0) err = dsl_dir_rename(dd, newname); dsl_dir_close(dd, FTAG); return (err); } if (tail[0] != '@') { /* the name ended in a nonexistent component */ dsl_dir_close(dd, FTAG); return (ENOENT); } dsl_dir_close(dd, FTAG); /* new name must be snapshot in same filesystem */ tail = strchr(newname, '@'); if (tail == NULL) return (EINVAL); tail++; if (strncmp(oldname, newname, tail - newname) != 0) return (EXDEV); if (recursive) { err = dsl_recursive_rename(oldname, newname); } else { err = dsl_dataset_hold(oldname, FTAG, &ds); if (err) return (err); err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_snapshot_rename_check, dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); dsl_dataset_rele(ds, FTAG); } return (err); } struct promotenode { list_node_t link; dsl_dataset_t *ds; }; struct promotearg { list_t shared_snaps, origin_snaps, clone_snaps; dsl_dataset_t *origin_origin; uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; char *err_ds; }; static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); static int dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *hds = arg1; struct promotearg *pa = arg2; struct promotenode *snap = list_head(&pa->shared_snaps); dsl_dataset_t *origin_ds = snap->ds; int err; uint64_t unused; /* Check that it is a real clone */ if (!dsl_dir_is_clone(hds->ds_dir)) return (EINVAL); /* Since this is so expensive, don't do the preliminary check */ if (!dmu_tx_is_syncing(tx)) return (0); if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) return (EXDEV); /* compute origin's new unique space */ snap = list_tail(&pa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); dsl_deadlist_space_range(&snap->ds->ds_deadlist, origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique, &unused, &unused); /* * Walk the snapshots that we are moving * * Compute space to transfer. Consider the incremental changes * to used for each snapshot: * (my used) = (prev's used) + (blocks born) - (blocks killed) * So each snapshot gave birth to: * (blocks born) = (my used) - (prev's used) + (blocks killed) * So a sequence would look like: * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) * Which simplifies to: * uN + kN + kN-1 + ... + k1 + k0 * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ pa->used = origin_ds->ds_phys->ds_used_bytes; pa->comp = origin_ds->ds_phys->ds_compressed_bytes; pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; for (snap = list_head(&pa->shared_snaps); snap; snap = list_next(&pa->shared_snaps, snap)) { uint64_t val, dlused, dlcomp, dluncomp; dsl_dataset_t *ds = snap->ds; /* Check that the snapshot name does not conflict */ VERIFY(0 == dsl_dataset_get_snapname(ds)); err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); if (err == 0) { err = EEXIST; goto out; } if (err != ENOENT) goto out; /* The very first snapshot does not have a deadlist */ if (ds->ds_phys->ds_prev_snap_obj == 0) continue; dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); pa->used += dlused; pa->comp += dlcomp; pa->uncomp += dluncomp; } /* * If we are a clone of a clone then we never reached ORIGIN, * so we need to subtract out the clone origin's used space. */ if (pa->origin_origin) { pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; } /* Check that there is enough space here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, pa->used); if (err) return (err); /* * Compute the amounts of space that will be used by snapshots * after the promotion (for both origin and clone). For each, * it is the amount of space that will be on all of their * deadlists (that was not born before their new origin). */ if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { uint64_t space; /* * Note, typically this will not be a clone of a clone, * so dd_origin_txg will be < TXG_INITIAL, so * these snaplist_space() -> dsl_deadlist_space_range() * calls will be fast because they do not have to * iterate over all bps. */ snap = list_head(&pa->origin_snaps); err = snaplist_space(&pa->shared_snaps, snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); if (err) return (err); err = snaplist_space(&pa->clone_snaps, snap->ds->ds_dir->dd_origin_txg, &space); if (err) return (err); pa->cloneusedsnap += space; } if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { err = snaplist_space(&pa->origin_snaps, origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); if (err) return (err); } return (0); out: pa->err_ds = snap->ds->ds_snapname; return (err); } static void dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *hds = arg1; struct promotearg *pa = arg2; struct promotenode *snap = list_head(&pa->shared_snaps); dsl_dataset_t *origin_ds = snap->ds; dsl_dataset_t *origin_head; dsl_dir_t *dd = hds->ds_dir; dsl_pool_t *dp = hds->ds_dir->dd_pool; dsl_dir_t *odd = NULL; uint64_t oldnext_obj; int64_t delta; ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); snap = list_head(&pa->origin_snaps); origin_head = snap->ds; /* * We need to explicitly open odd, since origin_ds's dd will be * changing. */ VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; snap = list_tail(&pa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; /* change the origin's next clone */ if (origin_ds->ds_phys->ds_next_clones_obj) { remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, origin_ds->ds_phys->ds_next_clones_obj, oldnext_obj, tx)); } /* change origin */ dmu_buf_will_dirty(dd->dd_dbuf, tx); ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; dmu_buf_will_dirty(odd->dd_dbuf, tx); odd->dd_phys->dd_origin_obj = origin_ds->ds_object; origin_head->ds_dir->dd_origin_txg = origin_ds->ds_phys->ds_creation_txg; /* change dd_clone entries */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, odd->dd_phys->dd_clones, hds->ds_object, tx)); VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, pa->origin_origin->ds_dir->dd_phys->dd_clones, hds->ds_object, tx)); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, pa->origin_origin->ds_dir->dd_phys->dd_clones, origin_head->ds_object, tx)); if (dd->dd_phys->dd_clones == 0) { dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, dd->dd_phys->dd_clones, origin_head->ds_object, tx)); } /* move snapshots to this dir */ for (snap = list_head(&pa->shared_snaps); snap; snap = list_next(&pa->shared_snaps, snap)) { dsl_dataset_t *ds = snap->ds; /* unregister props as dsl_dir is changing */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } /* move snap name entry */ VERIFY(0 == dsl_dataset_get_snapname(ds)); VERIFY(0 == dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); ds->ds_phys->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, odd); dsl_dir_close(ds->ds_dir, ds); VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); /* move any clone references */ if (ds->ds_phys->ds_next_clones_obj && spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, dp->dp_meta_objset, ds->ds_phys->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *cnds; uint64_t o; if (za.za_first_integer == oldnext_obj) { /* * We've already moved the * origin's reference. */ continue; } VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &cnds)); o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; VERIFY3U(zap_remove_int(dp->dp_meta_objset, odd->dd_phys->dd_clones, o, tx), ==, 0); VERIFY3U(zap_add_int(dp->dp_meta_objset, dd->dd_phys->dd_clones, o, tx), ==, 0); dsl_dataset_rele(cnds, FTAG); } zap_cursor_fini(&zc); } ASSERT3U(dsl_prop_numcb(ds), ==, 0); } /* * Change space accounting. * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either * both be valid, or both be 0 (resulting in delta == 0). This * is true for each of {clone,origin} independently. */ delta = pa->cloneusedsnap - dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, >=, 0); ASSERT3U(pa->used, >=, delta); dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); dsl_dir_diduse_space(dd, DD_USED_HEAD, pa->used - delta, pa->comp, pa->uncomp, tx); delta = pa->originusedsnap - odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, <=, 0); ASSERT3U(pa->used, >=, -delta); dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); dsl_dir_diduse_space(odd, DD_USED_HEAD, -pa->used - delta, -pa->comp, -pa->uncomp, tx); origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, "dataset = %llu", hds->ds_object); dsl_dir_close(odd, FTAG); } static char *snaplist_tag = "snaplist"; /* * Make a list of dsl_dataset_t's for the snapshots between first_obj * (exclusive) and last_obj (inclusive). The list will be in reverse * order (last_obj will be the list_head()). If first_obj == 0, do all * snapshots back to this dataset's origin. */ static int snaplist_make(dsl_pool_t *dp, boolean_t own, uint64_t first_obj, uint64_t last_obj, list_t *l) { uint64_t obj = last_obj; ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); list_create(l, sizeof (struct promotenode), offsetof(struct promotenode, link)); while (obj != first_obj) { dsl_dataset_t *ds; struct promotenode *snap; int err; if (own) { err = dsl_dataset_own_obj(dp, obj, 0, snaplist_tag, &ds); if (err == 0) dsl_dataset_make_exclusive(ds, snaplist_tag); } else { err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); } if (err == ENOENT) { /* lost race with snapshot destroy */ struct promotenode *last = list_tail(l); ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); obj = last->ds->ds_phys->ds_prev_snap_obj; continue; } else if (err) { return (err); } if (first_obj == 0) first_obj = ds->ds_dir->dd_phys->dd_origin_obj; snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); snap->ds = ds; list_insert_tail(l, snap); obj = ds->ds_phys->ds_prev_snap_obj; } return (0); } static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) { struct promotenode *snap; *spacep = 0; for (snap = list_head(l); snap; snap = list_next(l, snap)) { uint64_t used, comp, uncomp; dsl_deadlist_space_range(&snap->ds->ds_deadlist, mintxg, UINT64_MAX, &used, &comp, &uncomp); *spacep += used; } return (0); } static void snaplist_destroy(list_t *l, boolean_t own) { struct promotenode *snap; if (!l || !list_link_active(&l->list_head)) return; while ((snap = list_tail(l)) != NULL) { list_remove(l, snap); if (own) dsl_dataset_disown(snap->ds, snaplist_tag); else dsl_dataset_rele(snap->ds, snaplist_tag); kmem_free(snap, sizeof (struct promotenode)); } list_destroy(l); } /* * Promote a clone. Nomenclature note: * "clone" or "cds": the original clone which is being promoted * "origin" or "ods": the snapshot which is originally clone's origin * "origin head" or "ohds": the dataset which is the head * (filesystem/volume) for the origin * "origin origin": the origin of the origin's filesystem (typically * NULL, indicating that the clone is not a clone of a clone). */ int dsl_dataset_promote(const char *name, char *conflsnap) { dsl_dataset_t *ds; dsl_dir_t *dd; dsl_pool_t *dp; dmu_object_info_t doi; struct promotearg pa; struct promotenode *snap; int err; bzero(&pa, sizeof(struct promotearg)); err = dsl_dataset_hold(name, FTAG, &ds); if (err) return (err); dd = ds->ds_dir; dp = dd->dd_pool; err = dmu_object_info(dp->dp_meta_objset, ds->ds_phys->ds_snapnames_zapobj, &doi); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { dsl_dataset_rele(ds, FTAG); return (EINVAL); } /* * We are going to inherit all the snapshots taken before our * origin (i.e., our new origin will be our parent's origin). * Take ownership of them so that we can rename them into our * namespace. */ rw_enter(&dp->dp_config_rwlock, RW_READER); err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, &pa.shared_snaps); if (err != 0) goto out; err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); if (err != 0) goto out; snap = list_head(&pa.shared_snaps); ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); if (err != 0) goto out; if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { err = dsl_dataset_hold_obj(dp, snap->ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &pa.origin_origin); if (err != 0) goto out; } out: rw_exit(&dp->dp_config_rwlock); /* * Add in 128x the snapnames zapobj size, since we will be moving * a bunch of snapnames to the promoted ds, and dirtying their * bonus buffers. */ if (err == 0) { err = dsl_sync_task_do(dp, dsl_dataset_promote_check, dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blocks_512); if (err && pa.err_ds && conflsnap) (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); } snaplist_destroy(&pa.shared_snaps, B_TRUE); snaplist_destroy(&pa.clone_snaps, B_FALSE); snaplist_destroy(&pa.origin_snaps, B_FALSE); if (pa.origin_origin) dsl_dataset_rele(pa.origin_origin, FTAG); dsl_dataset_rele(ds, FTAG); return (err); } struct cloneswaparg { dsl_dataset_t *cds; /* clone dataset */ dsl_dataset_t *ohds; /* origin's head dataset */ boolean_t force; int64_t unused_refres_delta; /* change in unconsumed refreservation */ }; /* ARGSUSED */ static int dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) { struct cloneswaparg *csa = arg1; /* they should both be heads */ if (dsl_dataset_is_snapshot(csa->cds) || dsl_dataset_is_snapshot(csa->ohds)) return (EINVAL); /* the branch point should be just before them */ if (csa->cds->ds_prev != csa->ohds->ds_prev) return (EINVAL); /* cds should be the clone (unless they are unrelated) */ if (csa->cds->ds_prev != NULL && csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && csa->ohds->ds_object != csa->cds->ds_prev->ds_phys->ds_next_snap_obj) return (EINVAL); /* the clone should be a child of the origin */ if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) return (EINVAL); /* ohds shouldn't be modified unless 'force' */ if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) return (ETXTBSY); /* adjust amount of any unconsumed refreservation */ csa->unused_refres_delta = (int64_t)MIN(csa->ohds->ds_reserved, csa->ohds->ds_phys->ds_unique_bytes) - (int64_t)MIN(csa->ohds->ds_reserved, csa->cds->ds_phys->ds_unique_bytes); if (csa->unused_refres_delta > 0 && csa->unused_refres_delta > dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) return (ENOSPC); if (csa->ohds->ds_quota != 0 && csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) return (EDQUOT); return (0); } /* ARGSUSED */ static void dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) { struct cloneswaparg *csa = arg1; dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; ASSERT(csa->cds->ds_reserved == 0); ASSERT(csa->ohds->ds_quota == 0 || csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); if (csa->cds->ds_objset != NULL) { dmu_objset_evict(csa->cds->ds_objset); csa->cds->ds_objset = NULL; } if (csa->ohds->ds_objset != NULL) { dmu_objset_evict(csa->ohds->ds_objset); csa->ohds->ds_objset = NULL; } /* * Reset origin's unique bytes, if it exists. */ if (csa->cds->ds_prev) { dsl_dataset_t *origin = csa->cds->ds_prev; uint64_t comp, uncomp; dmu_buf_will_dirty(origin->ds_dbuf, tx); dsl_deadlist_space_range(&csa->cds->ds_deadlist, origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); } /* swap blkptrs */ { blkptr_t tmp; tmp = csa->ohds->ds_phys->ds_bp; csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; csa->cds->ds_phys->ds_bp = tmp; } /* set dd_*_bytes */ { int64_t dused, dcomp, duncomp; uint64_t cdl_used, cdl_comp, cdl_uncomp; uint64_t odl_used, odl_comp, odl_uncomp; ASSERT3U(csa->cds->ds_dir->dd_phys-> dd_used_breakdown[DD_USED_SNAP], ==, 0); dsl_deadlist_space(&csa->cds->ds_deadlist, &cdl_used, &cdl_comp, &cdl_uncomp); dsl_deadlist_space(&csa->ohds->ds_deadlist, &odl_used, &odl_comp, &odl_uncomp); dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - (csa->ohds->ds_phys->ds_used_bytes + odl_used); dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + cdl_uncomp - (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, dused, dcomp, duncomp, tx); dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, -dused, -dcomp, -duncomp, tx); /* * The difference in the space used by snapshots is the * difference in snapshot space due to the head's * deadlist (since that's the only thing that's * changing that affects the snapused). */ dsl_deadlist_space_range(&csa->cds->ds_deadlist, csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &cdl_used, &cdl_comp, &cdl_uncomp); dsl_deadlist_space_range(&csa->ohds->ds_deadlist, csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &odl_used, &odl_comp, &odl_uncomp); dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, DD_USED_HEAD, DD_USED_SNAP, tx); } /* swap ds_*_bytes */ SWITCH64(csa->ohds->ds_phys->ds_used_bytes, csa->cds->ds_phys->ds_used_bytes); SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, csa->cds->ds_phys->ds_compressed_bytes); SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, csa->cds->ds_phys->ds_uncompressed_bytes); SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, csa->cds->ds_phys->ds_unique_bytes); /* apply any parent delta for change in unconsumed refreservation */ dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, csa->unused_refres_delta, 0, 0, tx); /* * Swap deadlists. */ dsl_deadlist_close(&csa->cds->ds_deadlist); dsl_deadlist_close(&csa->ohds->ds_deadlist); SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, csa->cds->ds_phys->ds_deadlist_obj); dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, csa->cds->ds_phys->ds_deadlist_obj); dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); } /* * Swap 'clone' with its origin head datasets. Used at the end of "zfs * recv" into an existing fs to swizzle the file system to the new * version, and by "zfs rollback". Can also be used to swap two * independent head datasets if neither has any snapshots. */ int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force) { struct cloneswaparg csa; int error; ASSERT(clone->ds_owner); ASSERT(origin_head->ds_owner); retry: /* * Need exclusive access for the swap. If we're swapping these * datasets back after an error, we already hold the locks. */ if (!RW_WRITE_HELD(&clone->ds_rwlock)) rw_enter(&clone->ds_rwlock, RW_WRITER); if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { rw_exit(&clone->ds_rwlock); rw_enter(&origin_head->ds_rwlock, RW_WRITER); if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { rw_exit(&origin_head->ds_rwlock); goto retry; } } csa.cds = clone; csa.ohds = origin_head; csa.force = force; error = dsl_sync_task_do(clone->ds_dir->dd_pool, dsl_dataset_clone_swap_check, dsl_dataset_clone_swap_sync, &csa, NULL, 9); return (error); } /* * Given a pool name and a dataset object number in that pool, * return the name of that dataset. */ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) { spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; int error; if ((error = spa_open(pname, &spa, FTAG)) != 0) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); } rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); return (error); } int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) { int error = 0; ASSERT3S(asize, >, 0); /* * *ref_rsrv is the portion of asize that will come from any * unconsumed refreservation space. */ *ref_rsrv = 0; mutex_enter(&ds->ds_lock); /* * Make a space adjustment for reserved bytes. */ if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { ASSERT3U(*used, >=, ds->ds_reserved - ds->ds_phys->ds_unique_bytes); *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); *ref_rsrv = asize - MIN(asize, parent_delta(ds, asize + inflight)); } if (!check_quota || ds->ds_quota == 0) { mutex_exit(&ds->ds_lock); return (0); } /* * If they are requesting more space, and our current estimate * is over quota, they get to try again unless the actual * on-disk is over quota and there are no pending changes (which * may free up space for us). */ if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) error = ERESTART; else error = EDQUOT; DMU_TX_STAT_BUMP(dmu_tx_quota); } mutex_exit(&ds->ds_lock); return (error); } /* ARGSUSED */ static int dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; dsl_prop_setarg_t *psa = arg2; int err; if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) return (ENOTSUP); if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) return (err); if (psa->psa_effective_value == 0) return (0); if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes || psa->psa_effective_value < ds->ds_reserved) return (ENOSPC); return (0); } extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); void dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; dsl_prop_setarg_t *psa = arg2; uint64_t effective_value = psa->psa_effective_value; dsl_prop_set_sync(ds, psa, tx); DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); if (ds->ds_quota != effective_value) { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = effective_value; spa_history_log_internal(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", (longlong_t)ds->ds_quota, ds->ds_object); } } int dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) { dsl_dataset_t *ds; dsl_prop_setarg_t psa; int err; dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); err = dsl_dataset_hold(dsname, FTAG, &ds); if (err) return (err); /* * If someone removes a file, then tries to set the quota, we * want to make sure the file freeing takes effect. */ txg_wait_open(ds->ds_dir->dd_pool, 0); err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, ds, &psa, 0); dsl_dataset_rele(ds, FTAG); return (err); } static int dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; dsl_prop_setarg_t *psa = arg2; uint64_t effective_value; uint64_t unique; int err; if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFRESERVATION) return (ENOTSUP); if (dsl_dataset_is_snapshot(ds)) return (EINVAL); if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) return (err); effective_value = psa->psa_effective_value; /* * If we are doing the preliminary check in open context, the * space estimates may be inaccurate. */ if (!dmu_tx_is_syncing(tx)) return (0); mutex_enter(&ds->ds_lock); if (!DS_UNIQUE_IS_ACCURATE(ds)) dsl_dataset_recalc_head_uniq(ds); unique = ds->ds_phys->ds_unique_bytes; mutex_exit(&ds->ds_lock); if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { uint64_t delta = MAX(unique, effective_value) - MAX(unique, ds->ds_reserved); if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) return (ENOSPC); if (ds->ds_quota > 0 && effective_value > ds->ds_quota) return (ENOSPC); } return (0); } static void dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; dsl_prop_setarg_t *psa = arg2; uint64_t effective_value = psa->psa_effective_value; uint64_t unique; int64_t delta; dsl_prop_set_sync(ds, psa, tx); DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); unique = ds->ds_phys->ds_unique_bytes; delta = MAX(0, (int64_t)(effective_value - unique)) - MAX(0, (int64_t)(ds->ds_reserved - unique)); ds->ds_reserved = effective_value; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); spa_history_log_internal(LOG_DS_REFRESERV, ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", (longlong_t)effective_value, ds->ds_object); } int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, uint64_t reservation) { dsl_dataset_t *ds; dsl_prop_setarg_t psa; int err; dsl_prop_setarg_init_uint64(&psa, "refreservation", source, &reservation); err = dsl_dataset_hold(dsname, FTAG, &ds); if (err) return (err); err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_set_reservation_check, dsl_dataset_set_reservation_sync, ds, &psa, 0); dsl_dataset_rele(ds, FTAG); return (err); } typedef struct zfs_hold_cleanup_arg { dsl_pool_t *dp; uint64_t dsobj; char htag[MAXNAMELEN]; } zfs_hold_cleanup_arg_t; static void dsl_dataset_user_release_onexit(void *arg) { zfs_hold_cleanup_arg_t *ca = arg; (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, B_TRUE); kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); } void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, minor_t minor) { zfs_hold_cleanup_arg_t *ca; ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); ca->dp = ds->ds_dir->dd_pool; ca->dsobj = ds->ds_object; (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); VERIFY3U(0, ==, zfs_onexit_add_cb(minor, dsl_dataset_user_release_onexit, ca, NULL)); } /* * If you add new checks here, you may need to add * additional checks to the "temporary" case in * snapshot_check() in dmu_objset.c. */ static int dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; char *htag = ha->htag; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int error = 0; if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) return (ENOTSUP); if (!dsl_dataset_is_snapshot(ds)) return (EINVAL); /* tags must be unique */ mutex_enter(&ds->ds_lock); if (ds->ds_phys->ds_userrefs_obj) { error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 8, 1, tx); if (error == 0) error = EEXIST; else if (error == ENOENT) error = 0; } mutex_exit(&ds->ds_lock); if (error == 0 && ha->temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) error = E2BIG; return (error); } void dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; char *htag = ha->htag; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t now = gethrestime_sec(); uint64_t zapobj; mutex_enter(&ds->ds_lock); if (ds->ds_phys->ds_userrefs_obj == 0) { /* * This is the first user hold for this dataset. Create * the userrefs zap object. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); zapobj = ds->ds_phys->ds_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); } else { zapobj = ds->ds_phys->ds_userrefs_obj; } ds->ds_userrefs++; mutex_exit(&ds->ds_lock); VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); if (ha->temphold) { VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, htag, &now, tx)); } spa_history_log_internal(LOG_DS_USER_HOLD, dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, (int)ha->temphold, ds->ds_object); } static int dsl_dataset_user_hold_one(const char *dsname, void *arg) { struct dsl_ds_holdarg *ha = arg; dsl_dataset_t *ds; int error; char *name; /* alloc a buffer to hold dsname@snapname plus terminating NULL */ name = kmem_asprintf("%s@%s", dsname, ha->snapname); error = dsl_dataset_hold(name, ha->dstg, &ds); strfree(name); if (error == 0) { ha->gotone = B_TRUE; dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, ds, ha, 0); } else if (error == ENOENT && ha->recursive) { error = 0; } else { (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); } return (error); } int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, boolean_t temphold) { struct dsl_ds_holdarg *ha; int error; ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); ha->htag = htag; ha->temphold = temphold; error = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, ds, ha, 0); kmem_free(ha, sizeof (struct dsl_ds_holdarg)); return (error); } int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, boolean_t recursive, boolean_t temphold, int cleanup_fd) { struct dsl_ds_holdarg *ha; dsl_sync_task_t *dst; spa_t *spa; int error; minor_t minor = 0; if (cleanup_fd != -1) { /* Currently we only support cleanup-on-exit of tempholds. */ if (!temphold) return (EINVAL); error = zfs_onexit_fd_hold(cleanup_fd, &minor); if (error) return (error); } ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); error = spa_open(dsname, &spa, FTAG); if (error) { kmem_free(ha, sizeof (struct dsl_ds_holdarg)); if (cleanup_fd != -1) zfs_onexit_fd_rele(cleanup_fd); return (error); } ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); ha->htag = htag; ha->snapname = snapname; ha->recursive = recursive; ha->temphold = temphold; if (recursive) { error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, ha, DS_FIND_CHILDREN); } else { error = dsl_dataset_user_hold_one(dsname, ha); } if (error == 0) error = dsl_sync_task_group_wait(ha->dstg); for (dst = list_head(&ha->dstg->dstg_tasks); dst; dst = list_next(&ha->dstg->dstg_tasks, dst)) { dsl_dataset_t *ds = dst->dst_arg1; if (dst->dst_err) { dsl_dataset_name(ds, ha->failed); *strchr(ha->failed, '@') = '\0'; } else if (error == 0 && minor != 0 && temphold) { /* * If this hold is to be released upon process exit, * register that action now. */ dsl_register_onexit_hold_cleanup(ds, htag, minor); } dsl_dataset_rele(ds, ha->dstg); } if (error == 0 && recursive && !ha->gotone) error = ENOENT; if (error) (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); dsl_sync_task_group_destroy(ha->dstg); kmem_free(ha, sizeof (struct dsl_ds_holdarg)); spa_close(spa, FTAG); if (cleanup_fd != -1) zfs_onexit_fd_rele(cleanup_fd); return (error); } struct dsl_ds_releasearg { dsl_dataset_t *ds; const char *htag; boolean_t own; /* do we own or just hold ds? */ }; static int dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, boolean_t *might_destroy) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t zapobj; uint64_t tmp; int error; *might_destroy = B_FALSE; mutex_enter(&ds->ds_lock); zapobj = ds->ds_phys->ds_userrefs_obj; if (zapobj == 0) { /* The tag can't possibly exist */ mutex_exit(&ds->ds_lock); return (ESRCH); } /* Make sure the tag exists */ error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); if (error) { mutex_exit(&ds->ds_lock); if (error == ENOENT) error = ESRCH; return (error); } if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)) *might_destroy = B_TRUE; mutex_exit(&ds->ds_lock); return (0); } static int dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) { struct dsl_ds_releasearg *ra = arg1; dsl_dataset_t *ds = ra->ds; boolean_t might_destroy; int error; if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) return (ENOTSUP); error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); if (error) return (error); if (might_destroy) { struct dsl_ds_destroyarg dsda = {0}; if (dmu_tx_is_syncing(tx)) { /* * If we're not prepared to remove the snapshot, * we can't allow the release to happen right now. */ if (!ra->own) return (EBUSY); } dsda.ds = ds; dsda.releasing = B_TRUE; return (dsl_dataset_destroy_check(&dsda, tag, tx)); } return (0); } static void dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) { struct dsl_ds_releasearg *ra = arg1; dsl_dataset_t *ds = ra->ds; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; uint64_t dsobj = ds->ds_object; uint64_t refs; int error; mutex_enter(&ds->ds_lock); ds->ds_userrefs--; refs = ds->ds_userrefs; mutex_exit(&ds->ds_lock); error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); VERIFY(error == 0 || error == ENOENT); zapobj = ds->ds_phys->ds_userrefs_obj; VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)) { struct dsl_ds_destroyarg dsda = {0}; ASSERT(ra->own); dsda.ds = ds; dsda.releasing = B_TRUE; /* We already did the destroy_check */ dsl_dataset_destroy_sync(&dsda, tag, tx); } spa_history_log_internal(LOG_DS_USER_RELEASE, dp->dp_spa, tx, "<%s> %lld dataset = %llu", ra->htag, (longlong_t)refs, dsobj); } static int dsl_dataset_user_release_one(const char *dsname, void *arg) { struct dsl_ds_holdarg *ha = arg; struct dsl_ds_releasearg *ra; dsl_dataset_t *ds; int error; void *dtag = ha->dstg; char *name; boolean_t own = B_FALSE; boolean_t might_destroy; /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ name = kmem_asprintf("%s@%s", dsname, ha->snapname); error = dsl_dataset_hold(name, dtag, &ds); strfree(name); if (error == ENOENT && ha->recursive) return (0); (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); if (error) return (error); ha->gotone = B_TRUE; ASSERT(dsl_dataset_is_snapshot(ds)); error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); if (error) { dsl_dataset_rele(ds, dtag); return (error); } if (might_destroy) { #ifdef _KERNEL name = kmem_asprintf("%s@%s", dsname, ha->snapname); error = zfs_unmount_snap(name, NULL); strfree(name); if (error) { dsl_dataset_rele(ds, dtag); return (error); } #endif if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { dsl_dataset_rele(ds, dtag); return (EBUSY); } else { own = B_TRUE; dsl_dataset_make_exclusive(ds, dtag); } } ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); ra->ds = ds; ra->htag = ha->htag; ra->own = own; dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, dsl_dataset_user_release_sync, ra, dtag, 0); return (0); } int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, boolean_t recursive) { struct dsl_ds_holdarg *ha; dsl_sync_task_t *dst; spa_t *spa; int error; top: ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); error = spa_open(dsname, &spa, FTAG); if (error) { kmem_free(ha, sizeof (struct dsl_ds_holdarg)); return (error); } ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); ha->htag = htag; ha->snapname = snapname; ha->recursive = recursive; if (recursive) { error = dmu_objset_find(dsname, dsl_dataset_user_release_one, ha, DS_FIND_CHILDREN); } else { error = dsl_dataset_user_release_one(dsname, ha); } if (error == 0) error = dsl_sync_task_group_wait(ha->dstg); for (dst = list_head(&ha->dstg->dstg_tasks); dst; dst = list_next(&ha->dstg->dstg_tasks, dst)) { struct dsl_ds_releasearg *ra = dst->dst_arg1; dsl_dataset_t *ds = ra->ds; if (dst->dst_err) dsl_dataset_name(ds, ha->failed); if (ra->own) dsl_dataset_disown(ds, ha->dstg); else dsl_dataset_rele(ds, ha->dstg); kmem_free(ra, sizeof (struct dsl_ds_releasearg)); } if (error == 0 && recursive && !ha->gotone) error = ENOENT; if (error && error != EBUSY) (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); dsl_sync_task_group_destroy(ha->dstg); kmem_free(ha, sizeof (struct dsl_ds_holdarg)); spa_close(spa, FTAG); /* * We can get EBUSY if we were racing with deferred destroy and * dsl_dataset_user_release_check() hadn't done the necessary * open context setup. We can also get EBUSY if we're racing * with destroy and that thread is the ds_owner. Either way * the busy condition should be transient, and we should retry * the release operation. */ if (error == EBUSY) goto top; return (error); } /* * Called at spa_load time (with retry == B_FALSE) to release a stale * temporary user hold. Also called by the onexit code (with retry == B_TRUE). */ int dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, boolean_t retry) { dsl_dataset_t *ds; char *snap; char *name; int namelen; int error; do { rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); if (error) return (error); namelen = dsl_dataset_namelen(ds)+1; name = kmem_alloc(namelen, KM_SLEEP); dsl_dataset_name(ds, name); dsl_dataset_rele(ds, FTAG); snap = strchr(name, '@'); *snap = '\0'; ++snap; error = dsl_dataset_user_release(name, snap, htag, B_FALSE); kmem_free(name, namelen); /* * The object can't have been destroyed because we have a hold, * but it might have been renamed, resulting in ENOENT. Retry * if we've been requested to do so. * * It would be nice if we could use the dsobj all the way * through and avoid ENOENT entirely. But we might need to * unmount the snapshot, and there's currently no way to lookup * a vfsp using a ZFS object id. */ } while ((error == ENOENT) && retry); return (error); } int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) { dsl_dataset_t *ds; int err; err = dsl_dataset_hold(dsname, FTAG, &ds); if (err) return (err); VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); if (ds->ds_phys->ds_userrefs_obj != 0) { zap_attribute_t *za; zap_cursor_t zc; za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_userrefs_obj); zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, za->za_first_integer)); } zap_cursor_fini(&zc); kmem_free(za, sizeof (zap_attribute_t)); } dsl_dataset_rele(ds, FTAG); return (0); } /* - * Note, this fuction is used as the callback for dmu_objset_find(). We + * Note, this function is used as the callback for dmu_objset_find(). We * always return 0 so that we will continue to find and process * inconsistent datasets, even if we encounter an error trying to * process one of them. */ /* ARGSUSED */ int dsl_destroy_inconsistent(const char *dsname, void *arg) { dsl_dataset_t *ds; if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { if (DS_IS_INCONSISTENT(ds)) (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); else dsl_dataset_disown(ds, FTAG); } return (0); } + +/* + * Return (in *usedp) the amount of space written in new that is not + * present in oldsnap. New may be a snapshot or the head. Old must be + * a snapshot before new, in new's filesystem (or its origin). If not then + * fail and return EINVAL. + * + * The written space is calculated by considering two components: First, we + * ignore any freed space, and calculate the written as new's used space + * minus old's used space. Next, we add in the amount of space that was freed + * between the two snapshots, thus reducing new's used space relative to old's. + * Specifically, this is the space that was born before old->ds_creation_txg, + * and freed before new (ie. on new's deadlist or a previous deadlist). + * + * space freed [---------------------] + * snapshots ---O-------O--------O-------O------ + * oldsnap new + */ +int +dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + int err = 0; + uint64_t snapobj; + dsl_pool_t *dp = new->ds_dir->dd_pool; + + *usedp = 0; + *usedp += new->ds_phys->ds_used_bytes; + *usedp -= oldsnap->ds_phys->ds_used_bytes; + + *compp = 0; + *compp += new->ds_phys->ds_compressed_bytes; + *compp -= oldsnap->ds_phys->ds_compressed_bytes; + + *uncompp = 0; + *uncompp += new->ds_phys->ds_uncompressed_bytes; + *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + snapobj = new->ds_object; + while (snapobj != oldsnap->ds_object) { + dsl_dataset_t *snap; + uint64_t used, comp, uncomp; + + err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); + if (err != 0) + break; + + if (snap->ds_phys->ds_prev_snap_txg == + oldsnap->ds_phys->ds_creation_txg) { + /* + * The blocks in the deadlist can not be born after + * ds_prev_snap_txg, so get the whole deadlist space, + * which is more efficient (especially for old-format + * deadlists). Unfortunately the deadlist code + * doesn't have enough information to make this + * optimization itself. + */ + dsl_deadlist_space(&snap->ds_deadlist, + &used, &comp, &uncomp); + } else { + dsl_deadlist_space_range(&snap->ds_deadlist, + 0, oldsnap->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + } + *usedp += used; + *compp += comp; + *uncompp += uncomp; + + /* + * If we get to the beginning of the chain of snapshots + * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap + * was not a snapshot of/before new. + */ + snapobj = snap->ds_phys->ds_prev_snap_obj; + dsl_dataset_rele(snap, FTAG); + if (snapobj == 0) { + err = EINVAL; + break; + } + + } + rw_exit(&dp->dp_config_rwlock); + return (err); +} + +/* + * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, + * lastsnap, and all snapshots in between are deleted. + * + * blocks that would be freed [---------------------------] + * snapshots ---O-------O--------O-------O--------O + * firstsnap lastsnap + * + * This is the set of blocks that were born after the snap before firstsnap, + * (birth > firstsnap->prev_snap_txg) and died before the snap after the + * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). + * We calculate this by iterating over the relevant deadlists (from the snap + * after lastsnap, backward to the snap after firstsnap), summing up the + * space on the deadlist that was born after the snap before firstsnap. + */ +int +dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, + dsl_dataset_t *lastsnap, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + int err = 0; + uint64_t snapobj; + dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; + + ASSERT(dsl_dataset_is_snapshot(firstsnap)); + ASSERT(dsl_dataset_is_snapshot(lastsnap)); + + /* + * Check that the snapshots are in the same dsl_dir, and firstsnap + * is before lastsnap. + */ + if (firstsnap->ds_dir != lastsnap->ds_dir || + firstsnap->ds_phys->ds_creation_txg > + lastsnap->ds_phys->ds_creation_txg) + return (EINVAL); + + *usedp = *compp = *uncompp = 0; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + snapobj = lastsnap->ds_phys->ds_next_snap_obj; + while (snapobj != firstsnap->ds_object) { + dsl_dataset_t *ds; + uint64_t used, comp, uncomp; + + err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); + if (err != 0) + break; + + dsl_deadlist_space_range(&ds->ds_deadlist, + firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + *usedp += used; + *compp += comp; + *uncompp += uncomp; + + snapobj = ds->ds_phys->ds_prev_snap_obj; + ASSERT3U(snapobj, !=, 0); + dsl_dataset_rele(ds, FTAG); + } + rw_exit(&dp->dp_config_rwlock); + return (err); +} + #if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_snapshots_destroy_nvl); EXPORT_SYMBOL(dsl_dataset_hold); EXPORT_SYMBOL(dsl_dataset_hold_obj); EXPORT_SYMBOL(dsl_dataset_own); EXPORT_SYMBOL(dsl_dataset_own_obj); EXPORT_SYMBOL(dsl_dataset_name); EXPORT_SYMBOL(dsl_dataset_rele); EXPORT_SYMBOL(dsl_dataset_disown); EXPORT_SYMBOL(dsl_dataset_drop_ref); EXPORT_SYMBOL(dsl_dataset_tryown); EXPORT_SYMBOL(dsl_dataset_make_exclusive); EXPORT_SYMBOL(dsl_dataset_create_sync); EXPORT_SYMBOL(dsl_dataset_create_sync_dd); EXPORT_SYMBOL(dsl_dataset_destroy); -EXPORT_SYMBOL(dsl_snapshots_destroy); EXPORT_SYMBOL(dsl_dataset_destroy_check); EXPORT_SYMBOL(dsl_dataset_destroy_sync); EXPORT_SYMBOL(dsl_dataset_snapshot_check); EXPORT_SYMBOL(dsl_dataset_snapshot_sync); EXPORT_SYMBOL(dsl_dataset_rename); EXPORT_SYMBOL(dsl_dataset_promote); EXPORT_SYMBOL(dsl_dataset_clone_swap); EXPORT_SYMBOL(dsl_dataset_user_hold); EXPORT_SYMBOL(dsl_dataset_user_release); EXPORT_SYMBOL(dsl_dataset_user_release_tmp); EXPORT_SYMBOL(dsl_dataset_get_holds); EXPORT_SYMBOL(dsl_dataset_get_blkptr); EXPORT_SYMBOL(dsl_dataset_set_blkptr); EXPORT_SYMBOL(dsl_dataset_get_spa); EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap); +EXPORT_SYMBOL(dsl_dataset_space_written); +EXPORT_SYMBOL(dsl_dataset_space_wouldfree); EXPORT_SYMBOL(dsl_dataset_sync); EXPORT_SYMBOL(dsl_dataset_block_born); EXPORT_SYMBOL(dsl_dataset_block_kill); EXPORT_SYMBOL(dsl_dataset_block_freeable); EXPORT_SYMBOL(dsl_dataset_prev_snap_txg); EXPORT_SYMBOL(dsl_dataset_dirty); EXPORT_SYMBOL(dsl_dataset_stats); EXPORT_SYMBOL(dsl_dataset_fast_stat); EXPORT_SYMBOL(dsl_dataset_space); EXPORT_SYMBOL(dsl_dataset_fsid_guid); EXPORT_SYMBOL(dsl_dsobj_to_dsname); EXPORT_SYMBOL(dsl_dataset_check_quota); EXPORT_SYMBOL(dsl_dataset_set_quota); EXPORT_SYMBOL(dsl_dataset_set_quota_sync); EXPORT_SYMBOL(dsl_dataset_set_reservation); EXPORT_SYMBOL(dsl_destroy_inconsistent); #endif diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index 064f8aceb8ee..dd6db2120b31 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -1,474 +1,498 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include +/* + * Deadlist concurrency: + * + * Deadlists can only be modified from the syncing thread. + * + * Except for dsl_deadlist_insert(), it can only be modified with the + * dp_config_rwlock held with RW_WRITER. + * + * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can + * be called concurrently, from open context, with the dl_config_rwlock held + * with RW_READER. + * + * Therefore, we only need to provide locking between dsl_deadlist_insert() and + * the accessors, protecting: + * dl_phys->dl_used,comp,uncomp + * and protecting the dl_tree from being loaded. + * The locking is provided by dl_lock. Note that locking on the bpobj_t + * provides its own locking, and dl_oldfmt is immutable. + */ + static int dsl_deadlist_compare(const void *arg1, const void *arg2) { const dsl_deadlist_entry_t *dle1 = arg1; const dsl_deadlist_entry_t *dle2 = arg2; if (dle1->dle_mintxg < dle2->dle_mintxg) return (-1); else if (dle1->dle_mintxg > dle2->dle_mintxg) return (+1); else return (0); } static void dsl_deadlist_load_tree(dsl_deadlist_t *dl) { zap_cursor_t zc; zap_attribute_t za; ASSERT(!dl->dl_oldfmt); if (dl->dl_havetree) return; avl_create(&dl->dl_tree, dsl_deadlist_compare, sizeof (dsl_deadlist_entry_t), offsetof(dsl_deadlist_entry_t, dle_node)); for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = strtonum(za.za_name, NULL); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, za.za_first_integer)); avl_add(&dl->dl_tree, dle); } zap_cursor_fini(&zc); dl->dl_havetree = B_TRUE; } void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object) { dmu_object_info_t doi; mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL); dl->dl_os = os; dl->dl_object = object; VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf)); dmu_object_info_from_db(dl->dl_dbuf, &doi); if (doi.doi_type == DMU_OT_BPOBJ) { dmu_buf_rele(dl->dl_dbuf, dl); dl->dl_dbuf = NULL; dl->dl_oldfmt = B_TRUE; VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object)); return; } dl->dl_oldfmt = B_FALSE; dl->dl_phys = dl->dl_dbuf->db_data; dl->dl_havetree = B_FALSE; } void dsl_deadlist_close(dsl_deadlist_t *dl) { void *cookie = NULL; dsl_deadlist_entry_t *dle; if (dl->dl_oldfmt) { dl->dl_oldfmt = B_FALSE; bpobj_close(&dl->dl_bpobj); return; } if (dl->dl_havetree) { while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie)) != NULL) { bpobj_close(&dle->dle_bpobj); kmem_free(dle, sizeof (*dle)); } avl_destroy(&dl->dl_tree); } dmu_buf_rele(dl->dl_dbuf, dl); mutex_destroy(&dl->dl_lock); dl->dl_dbuf = NULL; dl->dl_phys = NULL; } uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx) { if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx)); return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, sizeof (dsl_deadlist_phys_t), tx)); } void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) { dmu_object_info_t doi; zap_cursor_t zc; zap_attribute_t za; VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi)); if (doi.doi_type == DMU_OT_BPOBJ) { bpobj_free(os, dlobj, tx); return; } for (zap_cursor_init(&zc, os, dlobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) bpobj_free(os, za.za_first_integer, tx); zap_cursor_fini(&zc); VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx)); } void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t *dle; avl_index_t where; if (dl->dl_oldfmt) { bpobj_enqueue(&dl->dl_bpobj, bp, tx); return; } dsl_deadlist_load_tree(dl); dmu_buf_will_dirty(dl->dl_dbuf, tx); mutex_enter(&dl->dl_lock); dl->dl_phys->dl_used += bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp); dl->dl_phys->dl_comp += BP_GET_PSIZE(bp); dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp); mutex_exit(&dl->dl_lock); dle_tofind.dle_mintxg = bp->blk_birth; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); else dle = AVL_PREV(&dl->dl_tree, dle); bpobj_enqueue(&dle->dle_bpobj, bp, tx); } /* * Insert new key in deadlist, which must be > all current entries. * mintxg is not inclusive. */ void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) { uint64_t obj; dsl_deadlist_entry_t *dle; if (dl->dl_oldfmt) return; dsl_deadlist_load_tree(dl); dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = mintxg; obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); avl_add(&dl->dl_tree, dle); VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object, mintxg, obj, tx)); } /* * Remove this key, merging its entries into the previous key. */ void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t *dle, *dle_prev; if (dl->dl_oldfmt) return; dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; dle = avl_find(&dl->dl_tree, &dle_tofind, NULL); dle_prev = AVL_PREV(&dl->dl_tree, dle); bpobj_enqueue_subobj(&dle_prev->dle_bpobj, dle->dle_bpobj.bpo_object, tx); avl_remove(&dl->dl_tree, dle); bpobj_close(&dle->dle_bpobj); kmem_free(dle, sizeof (*dle)); VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx)); } /* * Walk ds's snapshots to regenerate generate ZAP & AVL. */ static void dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj, uint64_t mrs_obj, dmu_tx_t *tx) { dsl_deadlist_t dl; dsl_pool_t *dp = dmu_objset_pool(os); dsl_deadlist_open(&dl, os, dlobj); if (dl.dl_oldfmt) { dsl_deadlist_close(&dl); return; } while (mrs_obj != 0) { dsl_dataset_t *ds; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds)); dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx); mrs_obj = ds->ds_phys->ds_prev_snap_obj; dsl_dataset_rele(ds, FTAG); } dsl_deadlist_close(&dl); } uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg, uint64_t mrs_obj, dmu_tx_t *tx) { dsl_deadlist_entry_t *dle; uint64_t newobj; newobj = dsl_deadlist_alloc(dl->dl_os, tx); if (dl->dl_oldfmt) { dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx); return (newobj); } dsl_deadlist_load_tree(dl); for (dle = avl_first(&dl->dl_tree); dle; dle = AVL_NEXT(&dl->dl_tree, dle)) { uint64_t obj; if (dle->dle_mintxg >= maxtxg) break; obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj, dle->dle_mintxg, obj, tx)); } return (newobj); } void dsl_deadlist_space(dsl_deadlist_t *dl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { if (dl->dl_oldfmt) { VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj, usedp, compp, uncompp)); return; } mutex_enter(&dl->dl_lock); *usedp = dl->dl_phys->dl_used; *compp = dl->dl_phys->dl_comp; *uncompp = dl->dl_phys->dl_uncomp; mutex_exit(&dl->dl_lock); } /* * return space used in the range (mintxg, maxtxg]. * Includes maxtxg, does not include mintxg. * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is - * UINT64_MAX). + * larger than any bp in the deadlist (eg. UINT64_MAX)). */ void dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { - dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t *dle; + dsl_deadlist_entry_t dle_tofind; avl_index_t where; if (dl->dl_oldfmt) { VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj, mintxg, maxtxg, usedp, compp, uncompp)); return; } - dsl_deadlist_load_tree(dl); *usedp = *compp = *uncompp = 0; + mutex_enter(&dl->dl_lock); + dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); /* * If we don't find this mintxg, there shouldn't be anything * after it either. */ ASSERT(dle != NULL || avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL); + for (; dle && dle->dle_mintxg < maxtxg; dle = AVL_NEXT(&dl->dl_tree, dle)) { uint64_t used, comp, uncomp; VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp)); *usedp += used; *compp += comp; *uncompp += uncomp; } + mutex_exit(&dl->dl_lock); } static void dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t *dle; avl_index_t where; uint64_t used, comp, uncomp; bpobj_t bpo; VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp)); bpobj_close(&bpo); dsl_deadlist_load_tree(dl); dmu_buf_will_dirty(dl->dl_dbuf, tx); mutex_enter(&dl->dl_lock); dl->dl_phys->dl_used += used; dl->dl_phys->dl_comp += comp; dl->dl_phys->dl_uncomp += uncomp; mutex_exit(&dl->dl_lock); dle_tofind.dle_mintxg = birth; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx); } static int dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; dsl_deadlist_insert(dl, bp, tx); return (0); } /* * Merge the deadlist pointed to by 'obj' into dl. obj will be left as * an empty deadlist. */ void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) { zap_cursor_t zc; zap_attribute_t za; dmu_buf_t *bonus; dsl_deadlist_phys_t *dlp; dmu_object_info_t doi; VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi)); if (doi.doi_type == DMU_OT_BPOBJ) { bpobj_t bpo; VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); VERIFY3U(0, ==, bpobj_iterate(&bpo, dsl_deadlist_insert_cb, dl, tx)); bpobj_close(&bpo); return; } for (zap_cursor_init(&zc, dl->dl_os, obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t mintxg = strtonum(za.za_name, NULL); dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx)); } zap_cursor_fini(&zc); VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); dlp = bonus->db_data; dmu_buf_will_dirty(bonus, tx); bzero(dlp, sizeof (*dlp)); dmu_buf_rele(bonus, FTAG); } /* * Remove entries on dl that are >= mintxg, and put them on the bpobj. */ void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t *dle; avl_index_t where; ASSERT(!dl->dl_oldfmt); dmu_buf_will_dirty(dl->dl_dbuf, tx); dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); while (dle) { uint64_t used, comp, uncomp; dsl_deadlist_entry_t *dle_next; bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp)); mutex_enter(&dl->dl_lock); ASSERT3U(dl->dl_phys->dl_used, >=, used); ASSERT3U(dl->dl_phys->dl_comp, >=, comp); ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp); dl->dl_phys->dl_used -= used; dl->dl_phys->dl_comp -= comp; dl->dl_phys->dl_uncomp -= uncomp; mutex_exit(&dl->dl_lock); VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, dle->dle_mintxg, tx)); dle_next = AVL_NEXT(&dl->dl_tree, dle); avl_remove(&dl->dl_tree, dle); bpobj_close(&dle->dle_bpobj); kmem_free(dle, sizeof (*dle)); dle = dle_next; } } diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 6b5c8424a687..a4d4e42da27d 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -1,760 +1,763 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* * DSL permissions are stored in a two level zap attribute * mechanism. The first level identifies the "class" of * entry. The class is identified by the first 2 letters of * the attribute. The second letter "l" or "d" identifies whether * it is a local or descendent permission. The first letter * identifies the type of entry. * * ul$ identifies permissions granted locally for this userid. * ud$ identifies permissions granted on descendent datasets for * this userid. * Ul$ identifies permission sets granted locally for this userid. * Ud$ identifies permission sets granted on descendent datasets for * this userid. * gl$ identifies permissions granted locally for this groupid. * gd$ identifies permissions granted on descendent datasets for * this groupid. * Gl$ identifies permission sets granted locally for this groupid. * Gd$ identifies permission sets granted on descendent datasets for * this groupid. * el$ identifies permissions granted locally for everyone. * ed$ identifies permissions granted on descendent datasets * for everyone. * El$ identifies permission sets granted locally for everyone. * Ed$ identifies permission sets granted to descendent datasets for * everyone. * c-$ identifies permission to create at dataset creation time. * C-$ identifies permission sets to grant locally at dataset creation * time. * s-$@ permissions defined in specified set @ * S-$@ Sets defined in named set @ * * Each of the above entities points to another zap attribute that contains one * attribute for each allowed permission, such as create, destroy,... * All of the "upper" case class types will specify permission set names * rather than permissions. * * Basically it looks something like this: * ul$12 -> ZAP OBJ -> permissions... * * The ZAP OBJ is referred to as the jump object. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_deleg.h" /* * Validate that user is allowed to delegate specified permissions. * * In order to delegate "create" you must have "create" * and "allow". */ int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) { nvpair_t *whopair = NULL; int error; if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) return (error); while ((whopair = nvlist_next_nvpair(nvp, whopair))) { nvlist_t *perms; nvpair_t *permpair = NULL; VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); while ((permpair = nvlist_next_nvpair(perms, permpair))) { const char *perm = nvpair_name(permpair); if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0) return (EPERM); if ((error = dsl_deleg_access(ddname, perm, cr)) != 0) return (error); } } return (0); } /* * Validate that user is allowed to unallow specified permissions. They * must have the 'allow' permission, and even then can only unallow * perms for their uid. */ int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) { nvpair_t *whopair = NULL; int error; char idstr[32]; if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) return (error); (void) snprintf(idstr, sizeof (idstr), "%lld", (longlong_t)crgetuid(cr)); while ((whopair = nvlist_next_nvpair(nvp, whopair))) { zfs_deleg_who_type_t type = nvpair_name(whopair)[0]; if (type != ZFS_DELEG_USER && type != ZFS_DELEG_USER_SETS) return (EPERM); if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0) return (EPERM); } return (0); } static void dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; nvlist_t *nvp = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; nvpair_t *whopair = NULL; uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } while ((whopair = nvlist_next_nvpair(nvp, whopair))) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; uint64_t jumpobj; VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); VERIFY(zap_update(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); } while ((permpair = nvlist_next_nvpair(perms, permpair))) { const char *perm = nvpair_name(permpair); uint64_t n = 0; VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); spa_history_log_internal(LOG_DS_PERM_UPDATE, dd->dd_pool->dp_spa, tx, "%s %s dataset = %llu", whokey, perm, dd->dd_phys->dd_head_dataset_obj); } } } static void dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; nvlist_t *nvp = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; nvpair_t *whopair = NULL; uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) return; while ((whopair = nvlist_next_nvpair(nvp, whopair))) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; uint64_t jumpobj; if (nvpair_value_nvlist(whopair, &perms) != 0) { if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == 0) { (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, dd->dd_pool->dp_spa, tx, "%s dataset = %llu", whokey, dd->dd_phys->dd_head_dataset_obj); continue; } if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) continue; while ((permpair = nvlist_next_nvpair(perms, permpair))) { const char *perm = nvpair_name(permpair); uint64_t n = 0; (void) zap_remove(mos, jumpobj, perm, tx); if (zap_count(mos, jumpobj, &n) == 0 && n == 0) { (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } spa_history_log_internal(LOG_DS_PERM_REMOVE, dd->dd_pool->dp_spa, tx, "%s %s dataset = %llu", whokey, perm, dd->dd_phys->dd_head_dataset_obj); } } } int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) { dsl_dir_t *dd; int error; nvpair_t *whopair = NULL; int blocks_modified = 0; error = dsl_dir_open(ddname, FTAG, &dd, NULL); if (error) return (error); if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) { dsl_dir_close(dd, FTAG); return (ENOTSUP); } while ((whopair = nvlist_next_nvpair(nvp, whopair))) blocks_modified++; error = dsl_sync_task_do(dd->dd_pool, NULL, unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, dd, nvp, blocks_modified); dsl_dir_close(dd, FTAG); return (error); } /* * Find all 'allow' permissions from a given point and then continue * traversing up to the root. * * This function constructs an nvlist of nvlists. * each setpoint is an nvlist composed of an nvlist of an nvlist * of the individual * users/groups/everyone/create * permissions. * * The nvlist will look like this. * * { source fsname -> { whokeys { permissions,...}, ...}} * * The fsname nvpairs will be arranged in a bottom up order. For example, * if we have the following structure a/b/c then the nvpairs for the fsnames * will be ordered a/b/c, a/b, a. */ int dsl_deleg_get(const char *ddname, nvlist_t **nvp) { dsl_dir_t *dd, *startdd; dsl_pool_t *dp; int error; objset_t *mos; zap_cursor_t *basezc, *zc; zap_attribute_t *baseza, *za; char *source; error = dsl_dir_open(ddname, FTAG, &startdd, NULL); if (error) return (error); dp = startdd->dd_pool; mos = dp->dp_meta_objset; zc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP); za = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP); basezc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP); baseza = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP); source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP); VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); rw_enter(&dp->dp_config_rwlock, RW_READER); for (dd = startdd; dd != NULL; dd = dd->dd_parent) { nvlist_t *sp_nvp; uint64_t n; if (dd->dd_phys->dd_deleg_zapobj && (zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) == 0) && n) { VERIFY(nvlist_alloc(&sp_nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); } else { continue; } for (zap_cursor_init(basezc, mos, dd->dd_phys->dd_deleg_zapobj); zap_cursor_retrieve(basezc, baseza) == 0; zap_cursor_advance(basezc)) { nvlist_t *perms_nvp; ASSERT(baseza->za_integer_length == 8); ASSERT(baseza->za_num_integers == 1); VERIFY(nvlist_alloc(&perms_nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (zap_cursor_init(zc, mos, baseza->za_first_integer); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { VERIFY(nvlist_add_boolean(perms_nvp, za->za_name) == 0); } zap_cursor_fini(zc); VERIFY(nvlist_add_nvlist(sp_nvp, baseza->za_name, perms_nvp) == 0); nvlist_free(perms_nvp); } zap_cursor_fini(basezc); dsl_dir_name(dd, source); VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0); nvlist_free(sp_nvp); } rw_exit(&dp->dp_config_rwlock); kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); kmem_free(baseza, sizeof(zap_attribute_t)); kmem_free(basezc, sizeof(zap_cursor_t)); kmem_free(za, sizeof(zap_attribute_t)); kmem_free(zc, sizeof(zap_cursor_t)); dsl_dir_close(startdd, FTAG); return (0); } /* * Routines for dsl_deleg_access() -- access checking. */ typedef struct perm_set { avl_node_t p_node; boolean_t p_matched; char p_setname[ZFS_MAX_DELEG_NAME]; } perm_set_t; static int perm_set_compare(const void *arg1, const void *arg2) { const perm_set_t *node1 = arg1; const perm_set_t *node2 = arg2; int val; val = strcmp(node1->p_setname, node2->p_setname); if (val == 0) return (0); return (val > 0 ? 1 : -1); } /* * Determine whether a specified permission exists. * * First the base attribute has to be retrieved. i.e. ul$12 * Once the base object has been retrieved the actual permission * is lookup up in the zap object the base object points to. * * Return 0 if permission exists, ENOENT if there is no whokey, EPERM if * there is no perm in that jumpobj. */ static int dsl_check_access(objset_t *mos, uint64_t zapobj, char type, char checkflag, void *valp, const char *perm) { int error; uint64_t jumpobj, zero; char whokey[ZFS_MAX_DELEG_NAME]; zfs_deleg_whokey(whokey, type, checkflag, valp); error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj); if (error == 0) { error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero); if (error == ENOENT) error = EPERM; } return (error); } /* * check a specified user/group for a requested permission */ static int dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm, int checkflag, cred_t *cr) { const gid_t *gids; int ngids; int i; uint64_t id; /* check for user */ id = crgetuid(cr); if (dsl_check_access(mos, zapobj, ZFS_DELEG_USER, checkflag, &id, perm) == 0) return (0); /* check for users primary group */ id = crgetgid(cr); if (dsl_check_access(mos, zapobj, ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) return (0); /* check for everyone entry */ id = -1; if (dsl_check_access(mos, zapobj, ZFS_DELEG_EVERYONE, checkflag, &id, perm) == 0) return (0); /* check each supplemental group user is a member of */ ngids = crgetngroups(cr); gids = crgetgroups(cr); for (i = 0; i != ngids; i++) { id = gids[i]; if (dsl_check_access(mos, zapobj, ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) return (0); } return (EPERM); } /* * Iterate over the sets specified in the specified zapobj * and load them into the permsets avl tree. */ static int dsl_load_sets(objset_t *mos, uint64_t zapobj, char type, char checkflag, void *valp, avl_tree_t *avl) { zap_cursor_t zc; zap_attribute_t za; perm_set_t *permnode; avl_index_t idx; uint64_t jumpobj; int error; char whokey[ZFS_MAX_DELEG_NAME]; zfs_deleg_whokey(whokey, type, checkflag, valp); error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj); if (error != 0) return (error); for (zap_cursor_init(&zc, mos, jumpobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { permnode = kmem_alloc(sizeof (perm_set_t), KM_SLEEP); (void) strlcpy(permnode->p_setname, za.za_name, sizeof (permnode->p_setname)); permnode->p_matched = B_FALSE; if (avl_find(avl, permnode, &idx) == NULL) { avl_insert(avl, permnode, idx); } else { kmem_free(permnode, sizeof (perm_set_t)); } } zap_cursor_fini(&zc); return (0); } /* * Load all permissions user based on cred belongs to. */ static void dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, char checkflag, cred_t *cr) { const gid_t *gids; int ngids, i; uint64_t id; id = crgetuid(cr); (void) dsl_load_sets(mos, zapobj, ZFS_DELEG_USER_SETS, checkflag, &id, avl); id = crgetgid(cr); (void) dsl_load_sets(mos, zapobj, ZFS_DELEG_GROUP_SETS, checkflag, &id, avl); (void) dsl_load_sets(mos, zapobj, ZFS_DELEG_EVERYONE_SETS, checkflag, NULL, avl); ngids = crgetngroups(cr); gids = crgetgroups(cr); for (i = 0; i != ngids; i++) { id = gids[i]; (void) dsl_load_sets(mos, zapobj, ZFS_DELEG_GROUP_SETS, checkflag, &id, avl); } } /* - * Check if user has requested permission. + * Check if user has requested permission. If descendent is set, must have + * descendent perms. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, + cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; void *cookie; int error; char checkflag; objset_t *mos; avl_tree_t permsets; perm_set_t *setnode; dp = ds->ds_dir->dd_pool; mos = dp->dp_meta_objset; if (dsl_delegation_on(mos) == B_FALSE) return (ECANCELED); if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) return (EPERM); - if (dsl_dataset_is_snapshot(ds)) { + if (dsl_dataset_is_snapshot(ds) || descendent) { /* * Snapshots are treated as descendents only, * local permissions do not apply. */ checkflag = ZFS_DELEG_DESCENDENT; } else { checkflag = ZFS_DELEG_LOCAL; } avl_create(&permsets, perm_set_compare, sizeof (perm_set_t), offsetof(perm_set_t, p_node)); rw_enter(&dp->dp_config_rwlock, RW_READER); for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent, checkflag = ZFS_DELEG_DESCENDENT) { uint64_t zapobj; boolean_t expanded; /* * If not in global zone then make sure * the zoned property is set */ if (!INGLOBALZONE(curproc)) { uint64_t zoned; if (dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_ZONED), 8, 1, &zoned, NULL, B_FALSE) != 0) break; if (!zoned) break; } zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) continue; dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr); again: expanded = B_FALSE; for (setnode = avl_first(&permsets); setnode; setnode = AVL_NEXT(&permsets, setnode)) { if (setnode->p_matched == B_TRUE) continue; /* See if this set directly grants this permission */ error = dsl_check_access(mos, zapobj, ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm); if (error == 0) goto success; if (error == EPERM) setnode->p_matched = B_TRUE; /* See if this set includes other sets */ error = dsl_load_sets(mos, zapobj, ZFS_DELEG_NAMED_SET_SETS, 0, setnode->p_setname, &permsets); if (error == 0) setnode->p_matched = expanded = B_TRUE; } /* * If we expanded any sets, that will define more sets, * which we need to check. */ if (expanded) goto again; error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr); if (error == 0) goto success; } error = EPERM; success: rw_exit(&dp->dp_config_rwlock); cookie = NULL; while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) kmem_free(setnode, sizeof (perm_set_t)); return (error); } int dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) { dsl_dataset_t *ds; int error; error = dsl_dataset_hold(dsname, FTAG, &ds); if (error) return (error); - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); dsl_dataset_rele(ds, FTAG); return (error); } /* * Other routines. */ static void copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, boolean_t dosets, uint64_t uid, dmu_tx_t *tx) { objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t jumpobj, pjumpobj; uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; zap_cursor_t zc; zap_attribute_t za; char whokey[ZFS_MAX_DELEG_NAME]; zfs_deleg_whokey(whokey, dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE, ZFS_DELEG_LOCAL, NULL); if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0) return; if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } zfs_deleg_whokey(whokey, dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER, ZFS_DELEG_LOCAL, &uid); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) { jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); } for (zap_cursor_init(&zc, mos, pjumpobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t zero = 0; ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); VERIFY(zap_update(mos, jumpobj, za.za_name, 8, 1, &zero, tx) == 0); } zap_cursor_fini(&zc); } /* * set all create time permission on new dataset. */ void dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr) { dsl_dir_t *dd; uint64_t uid = crgetuid(cr); if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) return; for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) { uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj; if (pzapobj == 0) continue; copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx); copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx); } } int dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx) { zap_cursor_t zc; zap_attribute_t za; if (zapobj == 0) return (0); for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); VERIFY(0 == zap_destroy(mos, za.za_first_integer, tx)); } zap_cursor_fini(&zc); VERIFY(0 == zap_destroy(mos, zapobj, tx)); return (0); } boolean_t dsl_delegation_on(objset_t *os) { return (!!spa_delegation(os->os_spa)); } #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dsl_deleg_get); EXPORT_SYMBOL(dsl_deleg_set); #endif diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index d428b7ad7398..3b285df650dd 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -1,874 +1,878 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ int zfs_txg_synctime_ms = 1000; /* target millisecs to sync a txg */ unsigned long zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */ unsigned long zfs_write_limit_max = 0; /* max data payload per txg */ unsigned long zfs_write_limit_inflated = 0; unsigned long zfs_write_limit_override = 0; kmutex_t zfs_write_limit_lock; static pgcnt_t old_physmem = 0; int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { uint64_t obj; int err; err = zap_lookup(dp->dp_meta_objset, dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, name, sizeof (obj), 1, &obj); if (err) return (err); return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); } static dsl_pool_t * dsl_pool_open_impl(spa_t *spa, uint64_t txg) { dsl_pool_t *dp; blkptr_t *bp = spa_get_rootblkptr(spa); dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); dp->dp_spa = spa; dp->dp_meta_rootbp = *bp; rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); dp->dp_write_limit = zfs_write_limit_min; txg_init(dp, txg); txg_list_create(&dp->dp_dirty_datasets, offsetof(dsl_dataset_t, ds_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_group_t, dstg_node)); list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), offsetof(dsl_dataset_t, ds_synced_link)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri, 1, 4, 0); return (dp); } int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rw_enter(&dp->dp_config_rwlock, RW_WRITER); err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &dp->dp_meta_objset); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_close(dd, dp); if (err) goto out; } if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, txg); out: rw_exit(&dp->dp_config_rwlock); if (err) dsl_pool_close(dp); else *dpp = dp; return (err); } void dsl_pool_close(dsl_pool_t *dp) { /* drop our references from dsl_pool_open() */ /* * Since we held the origin_snap from "syncing" context (which * includes pool-opening context), it actually only got a "ref" * and not a hold, so just drop that here. */ if (dp->dp_origin_snap) dsl_dataset_drop_ref(dp->dp_origin_snap, dp); if (dp->dp_mos_dir) dsl_dir_close(dp->dp_mos_dir, dp); if (dp->dp_free_dir) dsl_dir_close(dp->dp_free_dir, dp); if (dp->dp_root_dir) dsl_dir_close(dp->dp_root_dir, dp); bpobj_close(&dp->dp_free_bpobj); /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ if (dp->dp_meta_objset) dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); txg_list_destroy(&dp->dp_sync_tasks); txg_list_destroy(&dp->dp_dirty_dirs); list_destroy(&dp->dp_synced_datasets); arc_flush(dp->dp_spa); txg_fini(dp); dsl_scan_fini(dp); rw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_iput_taskq); if (dp->dp_blkstats) kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); kmem_free(dp, sizeof (dsl_pool_t)); } dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT3U(err, ==, 0); /* Initialize scan structures */ VERIFY3U(0, ==, dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY(0 == dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY(0 == dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx))); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); return (dp); } static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; + dsl_pool_t *dp = dmu_objset_pool(dl->dl_os); + rw_enter(&dp->dp_config_rwlock, RW_READER); dsl_deadlist_insert(dl, bp, tx); + rw_exit(&dp->dp_config_rwlock); return (0); } void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) { zio_t *zio; dmu_tx_t *tx; dsl_dir_t *dd; dsl_dataset_t *ds; dsl_sync_task_group_t *dstg; objset_t *mos = dp->dp_meta_objset; hrtime_t start, write_time; uint64_t data_written; int err; /* * We need to copy dp_space_towrite() before doing * dsl_sync_task_group_sync(), because * dsl_dataset_snapshot_reserve_space() will increase * dp_space_towrite but not actually write anything. */ data_written = dp->dp_space_towrite[txg & TXG_MASK]; tx = dmu_tx_create_assigned(dp, txg); dp->dp_read_overhead = 0; start = gethrtime(); zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg))) { /* * We must not sync any non-MOS datasets twice, because * we may have taken a snapshot of them. However, we * may sync newly-created datasets on pass 2. */ ASSERT(!list_link_active(&ds->ds_synced_link)); list_insert_tail(&dp->dp_synced_datasets, ds); dsl_dataset_sync(ds, zio, tx); } DTRACE_PROBE(pool_sync__1setup); err = zio_wait(zio); write_time = gethrtime() - start; ASSERT(err == 0); DTRACE_PROBE(pool_sync__2rootzio); for (ds = list_head(&dp->dp_synced_datasets); ds; ds = list_next(&dp->dp_synced_datasets, ds)) dmu_objset_do_userquota_updates(ds->ds_objset, tx); /* * Sync the datasets again to push out the changes due to * userspace updates. This must be done before we process the * sync tasks, because that could cause a snapshot of a dataset * whose ds_bp will be rewritten when we do this 2nd sync. */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg))) { ASSERT(list_link_active(&ds->ds_synced_link)); dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); } err = zio_wait(zio); /* * Move dead blocks from the pending deadlist to the on-disk * deadlist. */ for (ds = list_head(&dp->dp_synced_datasets); ds; ds = list_next(&dp->dp_synced_datasets, ds)) { bplist_iterate(&ds->ds_pending_deadlist, deadlist_enqueue_cb, &ds->ds_deadlist, tx); } while ((dstg = txg_list_remove(&dp->dp_sync_tasks, txg))) { /* * No more sync tasks should have been added while we * were syncing. */ ASSERT(spa_sync_pass(dp->dp_spa) == 1); dsl_sync_task_group_sync(dstg, tx); } DTRACE_PROBE(pool_sync__3task); start = gethrtime(); while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg))) dsl_dir_sync(dd, tx); write_time += gethrtime() - start; start = gethrtime(); if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dmu_objset_sync(mos, zio, tx); err = zio_wait(zio); ASSERT(err == 0); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); } write_time += gethrtime() - start; DTRACE_PROBE2(pool_sync__4io, hrtime_t, write_time, hrtime_t, dp->dp_read_overhead); write_time -= dp->dp_read_overhead; dmu_tx_commit(tx); dp->dp_space_towrite[txg & TXG_MASK] = 0; ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0); /* * If the write limit max has not been explicitly set, set it * to a fraction of available physical memory (default 1/8th). * Note that we must inflate the limit because the spa * inflates write sizes to account for data replication. * Check this each sync phase to catch changing memory size. */ if (physmem != old_physmem && zfs_write_limit_shift) { mutex_enter(&zfs_write_limit_lock); old_physmem = physmem; zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift; zfs_write_limit_inflated = MAX(zfs_write_limit_min, spa_get_asize(dp->dp_spa, zfs_write_limit_max)); mutex_exit(&zfs_write_limit_lock); } /* * Attempt to keep the sync time consistent by adjusting the * amount of write traffic allowed into each transaction group. * Weight the throughput calculation towards the current value: * thru = 3/4 old_thru + 1/4 new_thru * * Note: write_time is in nanosecs, so write_time/MICROSEC * yields millisecs */ ASSERT(zfs_write_limit_min > 0); if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) { uint64_t throughput = data_written / (write_time / MICROSEC); if (dp->dp_throughput) dp->dp_throughput = throughput / 4 + 3 * dp->dp_throughput / 4; else dp->dp_throughput = throughput; dp->dp_write_limit = MIN(zfs_write_limit_inflated, MAX(zfs_write_limit_min, dp->dp_throughput * zfs_txg_synctime_ms)); } } void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) { dsl_dataset_t *ds; objset_t *os; while ((ds = list_head(&dp->dp_synced_datasets))) { list_remove(&dp->dp_synced_datasets, ds); os = ds->ds_objset; zil_clean(os->os_zil, txg); ASSERT(!dmu_objset_is_dirty(os, txg)); dmu_buf_rele(ds->ds_dbuf, ds); } ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); } /* * TRUE if the current thread is the tx_sync_thread or if we * are being called from SPA context during pool initialization. */ int dsl_pool_sync_context(dsl_pool_t *dp) { return (curthread == dp->dp_tx.tx_sync_thread || spa_get_dsl(dp->dp_spa) == NULL); } uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) { uint64_t space, resv; /* * Reserve about 1.6% (1/64), or at least 32MB, for allocation * efficiency. * XXX The intent log is not accounted for, so it must fit * within this slop. * * If we're trying to assess whether it's OK to do a free, * cut the reservation in half to allow forward progress * (e.g. make it possible to rm(1) files from a full pool). */ space = spa_get_dspace(dp->dp_spa); resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); if (netfree) resv >>= 1; return (space - resv); } int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) { uint64_t reserved = 0; uint64_t write_limit = (zfs_write_limit_override ? zfs_write_limit_override : dp->dp_write_limit); if (zfs_no_write_throttle) { atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); return (0); } /* * Check to see if we have exceeded the maximum allowed IO for * this transaction group. We can do this without locks since * a little slop here is ok. Note that we do the reserved check * with only half the requested reserve: this is because the * reserve requests are worst-case, and we really don't want to * throttle based off of worst-case estimates. */ if (write_limit > 0) { reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK] + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; if (reserved && reserved > write_limit) { DMU_TX_STAT_BUMP(dmu_tx_write_limit); return (ERESTART); } } atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); /* * If this transaction group is over 7/8ths capacity, delay * the caller 1 clock tick. This will slow down the "fill" * rate until the sync process can catch up with us. */ if (reserved && reserved > (write_limit - (write_limit >> 3))) txg_delay(dp, tx->tx_txg, 1); return (0); } void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) { ASSERT(dp->dp_tempreserved[tx->tx_txg & TXG_MASK] >= space); atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], -space); } void dsl_pool_memory_pressure(dsl_pool_t *dp) { uint64_t space_inuse = 0; int i; if (dp->dp_write_limit == zfs_write_limit_min) return; for (i = 0; i < TXG_SIZE; i++) { space_inuse += dp->dp_space_towrite[i]; space_inuse += dp->dp_tempreserved[i]; } dp->dp_write_limit = MAX(zfs_write_limit_min, MIN(dp->dp_write_limit, space_inuse / 4)); } void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) { if (space > 0) { mutex_enter(&dp->dp_lock); dp->dp_space_towrite[tx->tx_txg & TXG_MASK] += space; mutex_exit(&dp->dp_lock); } } /* ARGSUSED */ static int upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds, *prev = NULL; int err; dsl_pool_t *dp = spa_get_dsl(spa); err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); if (err) return (err); while (ds->ds_phys->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) break; dsl_dataset_rele(ds, FTAG); ds = prev; prev = NULL; } if (prev == NULL) { prev = dp->dp_origin_snap; /* * The $ORIGIN can't have any data, or the accounting * will be wrong. */ ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { dsl_dataset_rele(ds, FTAG); return (0); } dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_prev_snap_obj = prev->ds_object; ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg; dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object; dmu_buf_will_dirty(prev->ds_dbuf, tx); prev->ds_phys->ds_num_children++; if (ds->ds_phys->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); } } ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); if (prev->ds_phys->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); prev->ds_phys->ds_next_clones_obj = zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY(0 == zap_add_int(dp->dp_meta_objset, prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); if (prev != dp->dp_origin_snap) dsl_dataset_rele(prev, FTAG); return (0); } void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap != NULL); VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, tx, DS_FIND_CHILDREN)); } /* ARGSUSED */ static int upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds; dsl_pool_t *dp = spa_get_dsl(spa); objset_t *mos = dp->dp_meta_objset; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); if (ds->ds_dir->dd_phys->dd_origin_obj) { dsl_dataset_t *origin; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin)); if (origin->ds_dir->dd_phys->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); origin->ds_dir->dd_phys->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); dsl_dataset_rele(origin, FTAG); } dsl_dataset_rele(ds, FTAG); return (0); } void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) { uint64_t obj; ASSERT(dmu_tx_is_syncing(tx)); (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY(0 == dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* * We can't use bpobj_alloc(), because spa_version() still * returns the old version, and we need a new-version bpobj with * subobj support. So call dmu_object_alloc() directly. */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN)); } void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) { uint64_t dsobj; dsl_dataset_t *ds; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap == NULL); /* create the origin dir, ds, & snap-ds */ rw_enter(&dp->dp_config_rwlock, RW_WRITER); dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, NULL, 0, kcred, tx); VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx); VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); rw_exit(&dp->dp_config_rwlock); } taskq_t * dsl_pool_iput_taskq(dsl_pool_t *dp) { return (dp->dp_iput_taskq); } /* * Walk through the pool-wide zap object of temporary snapshot user holds * and release them. */ void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) { zap_attribute_t za; zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; uint64_t dsobj; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; dsobj = strtonum(za.za_name, NULL); (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE); } zap_cursor_fini(&zc); } /* * Create the pool-wide zap object for storing temporary snapshot holds. */ void dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; ASSERT(dp->dp_tmp_userrefs_obj == 0); ASSERT(dmu_tx_is_syncing(tx)); dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0); } static int dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding) { objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; char *name; int error; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); ASSERT(dmu_tx_is_syncing(tx)); /* * If the pool was created prior to SPA_VERSION_USERREFS, the * zap object for temporary holds might not exist yet. */ if (zapobj == 0) { if (holding) { dsl_pool_user_hold_create_obj(dp, tx); zapobj = dp->dp_tmp_userrefs_obj; } else { return (ENOENT); } } name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); if (holding) error = zap_add(mos, zapobj, name, 8, 1, now, tx); else error = zap_remove(mos, zapobj, name, tx); strfree(name); return (error); } /* * Add a temporary hold for the given dataset object and tag. */ int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t *now, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); } /* * Release a temporary hold for the given dataset object and tag. */ int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL, tx, B_FALSE)); } #if defined(_KERNEL) && defined(HAVE_SPL) module_param(zfs_no_write_throttle, int, 0644); MODULE_PARM_DESC(zfs_no_write_throttle, "Disable write throttling"); module_param(zfs_write_limit_shift, int, 0444); MODULE_PARM_DESC(zfs_write_limit_shift, "log2(fraction of memory) per txg"); module_param(zfs_txg_synctime_ms, int, 0644); MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between tgx sync"); module_param(zfs_write_limit_min, ulong, 0444); MODULE_PARM_DESC(zfs_write_limit_min, "Min tgx write limit"); module_param(zfs_write_limit_max, ulong, 0444); MODULE_PARM_DESC(zfs_write_limit_max, "Max tgx write limit"); module_param(zfs_write_limit_inflated, ulong, 0444); MODULE_PARM_DESC(zfs_write_limit_inflated, "Inflated tgx write limit"); module_param(zfs_write_limit_override, ulong, 0444); MODULE_PARM_DESC(zfs_write_limit_override, "Override tgx write limit"); #endif diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 243f2b4ab370..7a25378752f8 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -1,513 +1,514 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include "zfs_comutil.h" #ifdef _KERNEL #include #endif /* * Routines to manage the on-disk history log. * * The history log is stored as a dmu object containing * tuples. * * Where "record nvlist" is a nvlist containing uint64_ts and strings, and * "packed record length" is the packed length of the "record nvlist" stored * as a little endian uint64_t. * * The log is implemented as a ring buffer, though the original creation * of the pool ('zpool create') is never overwritten. * * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer * of 'spa_history' stores the offsets for logging/retrieving history as * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of * where the 'zpool create' record is stored. This allows us to never * overwrite the original creation of the pool. 'sh_phys_max_off' is the * physical ending offset in bytes of the log. This tells you the length of * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record * is added, 'sh_eof' is incremented by the the size of the record. * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). * This is where the consumer should start reading from after reading in * the 'zpool create' portion of the log. * * 'sh_records_lost' keeps track of how many records have been overwritten * and permanently lost. */ /* convert a logical offset to physical */ static uint64_t spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) { uint64_t phys_len; phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; return ((log_off - shpp->sh_pool_create_len) % phys_len + shpp->sh_pool_create_len); } void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) { dmu_buf_t *dbp; spa_history_phys_t *shpp; objset_t *mos = spa->spa_meta_objset; ASSERT(spa->spa_history == 0); spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, sizeof (spa_history_phys_t), tx); VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, sizeof (uint64_t), 1, &spa->spa_history, tx) == 0); VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); ASSERT(dbp->db_size >= sizeof (spa_history_phys_t)); shpp = dbp->db_data; dmu_buf_will_dirty(dbp, tx); /* * Figure out maximum size of history log. We set it at - * 1% of pool size, with a max of 32MB and min of 128KB. + * 0.1% of pool size, with a max of 1G and min of 128KB. */ shpp->sh_phys_max_off = - metaslab_class_get_dspace(spa_normal_class(spa)) / 100; - shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20); + metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; + shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); dmu_buf_rele(dbp, FTAG); } /* * Change 'sh_bof' to the beginning of the next record. */ static int spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) { objset_t *mos = spa->spa_meta_objset; uint64_t firstread, reclen, phys_bof; char buf[sizeof (reclen)]; int err; phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, buf, DMU_READ_PREFETCH)) != 0) return (err); if (firstread != sizeof (reclen)) { if ((err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, sizeof (reclen) - firstread, buf + firstread, DMU_READ_PREFETCH)) != 0) return (err); } reclen = LE_64(*((uint64_t *)buf)); shpp->sh_bof += reclen + sizeof (reclen); shpp->sh_records_lost++; return (0); } static int spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, dmu_tx_t *tx) { uint64_t firstwrite, phys_eof; objset_t *mos = spa->spa_meta_objset; int err; ASSERT(MUTEX_HELD(&spa->spa_history_lock)); /* see if we need to reset logical BOF */ while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - (shpp->sh_eof - shpp->sh_bof) <= len) { if ((err = spa_history_advance_bof(spa, shpp)) != 0) { return (err); } } phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); shpp->sh_eof += len; dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); len -= firstwrite; if (len > 0) { /* write out the rest at the beginning of physical file */ dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, len, (char *)buf + firstwrite, tx); } return (0); } static char * spa_history_zone(void) { #ifdef _KERNEL #ifdef HAVE_SPL return ("linux"); #else return (curproc->p_zone->zone_name); #endif #else return ("global"); #endif } /* * Write out a history event. */ /*ARGSUSED*/ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; history_arg_t *hap = arg2; const char *history_str = hap->ha_history_str; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; nvlist_t *nvrecord; char *record_packed = NULL; int ret; /* * If we have an older pool that doesn't have a command * history object, create it now. */ mutex_enter(&spa->spa_history_lock); if (!spa->spa_history) spa_history_create_obj(spa, tx); mutex_exit(&spa->spa_history_lock); /* * Get the offset of where we need to write via the bonus buffer. * Update the offset when the write completes. */ VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); shpp = dbp->db_data; dmu_buf_will_dirty(dbp, tx); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(dbp, &doi); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); } #endif VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, gethrestime_sec()) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); if (hap->ha_zone != NULL) VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, hap->ha_zone) == 0); #ifdef _KERNEL VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, utsname.nodename) == 0); #endif if (hap->ha_log_type == LOG_CMD_POOL_CREATE || hap->ha_log_type == LOG_CMD_NORMAL) { VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, history_str) == 0); zfs_dbgmsg("command: %s", history_str); } else { VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, hap->ha_event) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, tx->tx_txg) == 0); VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, history_str) == 0); zfs_dbgmsg("internal %s pool:%s txg:%llu %s", zfs_history_event_names[hap->ha_event], spa_name(spa), (longlong_t)tx->tx_txg, history_str); } VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); record_packed = kmem_alloc(reclen, KM_SLEEP); VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, NV_ENCODE_XDR, KM_SLEEP) == 0); mutex_enter(&spa->spa_history_lock); if (hap->ha_log_type == LOG_CMD_POOL_CREATE) VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { shpp->sh_pool_create_len += sizeof (le_len) + reclen; shpp->sh_bof = shpp->sh_pool_create_len; } mutex_exit(&spa->spa_history_lock); nvlist_free(nvrecord); kmem_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); strfree(hap->ha_history_str); if (hap->ha_zone != NULL) strfree(hap->ha_zone); kmem_free(hap, sizeof (history_arg_t)); } /* * Write out a history event. */ int spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) { history_arg_t *ha; int err = 0; dmu_tx_t *tx; ASSERT(what != LOG_INTERNAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); ha->ha_history_str = strdup(history_str); ha->ha_zone = strdup(spa_history_zone()); ha->ha_log_type = what; ha->ha_uid = crgetuid(CRED()); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, spa_history_log_sync, spa, ha, 0, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free ha and strings */ return (err); } /* * Read out the command history. */ int spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) { objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; uint64_t read_len, phys_read_off, phys_eof; uint64_t leftover = 0; spa_history_phys_t *shpp; int err; /* * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) return (ENOENT); /* * The history is logged asynchronously, so when they request * the first chunk of history, make sure everything has been * synced to disk so that we get it. */ if (*offp == 0 && spa_writeable(spa)) txg_wait_synced(spa_get_dsl(spa), 0); if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) return (err); shpp = dbp->db_data; #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(dbp, &doi); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); } #endif mutex_enter(&spa->spa_history_lock); phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); if (*offp < shpp->sh_pool_create_len) { /* read in just the zpool create history */ phys_read_off = *offp; read_len = MIN(*len, shpp->sh_pool_create_len - phys_read_off); } else { /* * Need to reset passed in offset to BOF if the passed in * offset has since been overwritten. */ *offp = MAX(*offp, shpp->sh_bof); phys_read_off = spa_history_log_to_phys(*offp, shpp); /* * Read up to the minimum of what the user passed down or * the EOF (physical or logical). If we hit physical EOF, * use 'leftover' to read from the physical BOF. */ if (phys_read_off <= phys_eof) { read_len = MIN(*len, phys_eof - phys_read_off); } else { read_len = MIN(*len, shpp->sh_phys_max_off - phys_read_off); if (phys_read_off + *len > shpp->sh_phys_max_off) { leftover = MIN(*len - read_len, phys_eof - shpp->sh_pool_create_len); } } } /* offset for consumer to use next */ *offp += read_len + leftover; /* tell the consumer how much you actually read */ *len = read_len + leftover; if (read_len == 0) { mutex_exit(&spa->spa_history_lock); dmu_buf_rele(dbp, FTAG); return (0); } err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, DMU_READ_PREFETCH); if (leftover && err == 0) { err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, leftover, buf + read_len, DMU_READ_PREFETCH); } mutex_exit(&spa->spa_history_lock); dmu_buf_rele(dbp, FTAG); return (err); } static void log_internal(history_internal_events_t event, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { history_arg_t *ha; va_list adx_copy; /* * If this is part of creating a pool, not everything is * initialized yet, so don't bother logging the internal events. */ if (tx->tx_txg == TXG_INITIAL) return; ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); va_copy(adx_copy, adx); ha->ha_history_str = kmem_vasprintf(fmt, adx_copy); va_end(adx_copy); ha->ha_log_type = LOG_INTERNAL; ha->ha_event = event; ha->ha_zone = NULL; ha->ha_uid = 0; if (dmu_tx_is_syncing(tx)) { spa_history_log_sync(spa, ha, tx); } else { dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, spa_history_log_sync, spa, ha, 0, tx); } /* spa_history_log_sync() will free ha and strings */ } void spa_history_log_internal(history_internal_events_t event, spa_t *spa, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; va_list adx; /* create a tx if we didn't get one */ if (tx == NULL) { htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); if (dmu_tx_assign(htx, TXG_WAIT) != 0) { dmu_tx_abort(htx); return; } } va_start(adx, fmt); log_internal(event, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ if (tx == NULL) dmu_tx_commit(htx); } void spa_history_log_version(spa_t *spa, history_internal_events_t event) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { spa_history_log_internal(event, spa, NULL, "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, utsname.nodename, utsname.release, utsname.version, utsname.machine); } cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", event == LOG_POOL_IMPORT ? "imported" : event == LOG_POOL_CREATE ? "created" : "accessed", (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(spa_history_create_obj); EXPORT_SYMBOL(spa_history_get); EXPORT_SYMBOL(spa_history_log); EXPORT_SYMBOL(spa_history_log_internal); EXPORT_SYMBOL(spa_history_log_version); #endif diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 49aad2a3b031..bd3d4a8d8547 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -1,1499 +1,1500 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #endif static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); uint64_t zap_getflags(zap_t *zap) { if (zap->zap_ismicro) return (0); return (zap->zap_u.zap_fat.zap_phys->zap_flags); } int zap_hashbits(zap_t *zap) { if (zap_getflags(zap) & ZAP_FLAG_HASH64) return (48); else return (28); } uint32_t zap_maxcd(zap_t *zap) { if (zap_getflags(zap) & ZAP_FLAG_HASH64) return ((1<<16)-1); else return (-1U); } static uint64_t zap_hash(zap_name_t *zn) { zap_t *zap = zn->zn_zap; uint64_t h = 0; if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); h = *(uint64_t *)zn->zn_key_orig; } else { h = zap->zap_salt; ASSERT(h != 0); ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { int i; const uint64_t *wp = zn->zn_key_norm; ASSERT(zn->zn_key_intlen == 8); for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { int j; uint64_t word = *wp; for (j = 0; j < zn->zn_key_intlen; j++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ word) & 0xFF]; word >>= NBBY; } } } else { int i, len; const uint8_t *cp = zn->zn_key_norm; /* * We previously stored the terminating null on * disk, but didn't hash it, so we need to * continue to not hash it. (The * zn_key_*_numints includes the terminating * null for non-binary keys.) */ len = zn->zn_key_norm_numints - 1; ASSERT(zn->zn_key_intlen == 1); for (i = 0; i < len; cp++, i++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ *cp) & 0xFF]; } } } /* * Don't use all 64 bits, since we need some in the cookie for * the collision differentiator. We MUST use the high bits, * since those are the ones that we first pay attention to when * chosing the bucket. */ h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); return (h); } static int zap_normalize(zap_t *zap, const char *name, char *namenorm) { size_t inlen, outlen; int err; ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); inlen = strlen(name) + 1; outlen = ZAP_MAXNAMELEN; err = 0; (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); return (err); } boolean_t zap_match(zap_name_t *zn, const char *matchname) { ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); if (zn->zn_matchtype == MT_FIRST) { char norm[ZAP_MAXNAMELEN]; if (zap_normalize(zn->zn_zap, matchname, norm) != 0) return (B_FALSE); return (strcmp(zn->zn_key_norm, norm) == 0); } else { /* MT_BEST or MT_EXACT */ return (strcmp(zn->zn_key_orig, matchname) == 0); } } void zap_name_free(zap_name_t *zn) { kmem_free(zn, sizeof (zap_name_t)); } zap_name_t * zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) { zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = key; zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; zn->zn_matchtype = mt; if (zap->zap_normflags) { if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { zap_name_free(zn); return (NULL); } zn->zn_key_norm = zn->zn_normbuf; zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } else { if (mt != MT_EXACT) { zap_name_free(zn); return (NULL); } zn->zn_key_norm = zn->zn_key_orig; zn->zn_key_norm_numints = zn->zn_key_orig_numints; } zn->zn_hash = zap_hash(zn); return (zn); } zap_name_t * zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) { zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); ASSERT(zap->zap_normflags == 0); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = zn->zn_key_norm = key; zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; zn->zn_matchtype = MT_EXACT; zn->zn_hash = zap_hash(zn); return (zn); } static void mzap_byteswap(mzap_phys_t *buf, size_t size) { int i, max; buf->mz_block_type = BSWAP_64(buf->mz_block_type); buf->mz_salt = BSWAP_64(buf->mz_salt); buf->mz_normflags = BSWAP_64(buf->mz_normflags); max = (size / MZAP_ENT_LEN) - 1; for (i = 0; i < max; i++) { buf->mz_chunk[i].mze_value = BSWAP_64(buf->mz_chunk[i].mze_value); buf->mz_chunk[i].mze_cd = BSWAP_32(buf->mz_chunk[i].mze_cd); } } void zap_byteswap(void *buf, size_t size) { uint64_t block_type; block_type = *(uint64_t *)buf; if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { /* ASSERT(magic == ZAP_LEAF_MAGIC); */ mzap_byteswap(buf, size); } else { fzap_byteswap(buf, size); } } static int mze_compare(const void *arg1, const void *arg2) { const mzap_ent_t *mze1 = arg1; const mzap_ent_t *mze2 = arg2; if (mze1->mze_hash > mze2->mze_hash) return (+1); if (mze1->mze_hash < mze2->mze_hash) return (-1); if (mze1->mze_cd > mze2->mze_cd) return (+1); if (mze1->mze_cd < mze2->mze_cd) return (-1); return (0); } static void mze_insert(zap_t *zap, int chunkid, uint64_t hash) { mzap_ent_t *mze; ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); mze->mze_chunkid = chunkid; mze->mze_hash = hash; mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); avl_add(&zap->zap_m.zap_avl, mze); } static mzap_ent_t * mze_find(zap_name_t *zn) { mzap_ent_t mze_tofind; mzap_ent_t *mze; avl_index_t idx; avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; ASSERT(zn->zn_zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); mze_tofind.mze_hash = zn->zn_hash; mze_tofind.mze_cd = 0; again: mze = avl_find(avl, &mze_tofind, &idx); if (mze == NULL) mze = avl_nearest(avl, idx, AVL_AFTER); for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) return (mze); } if (zn->zn_matchtype == MT_BEST) { zn->zn_matchtype = MT_FIRST; goto again; } return (NULL); } static uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash) { mzap_ent_t mze_tofind; mzap_ent_t *mze; avl_index_t idx; avl_tree_t *avl = &zap->zap_m.zap_avl; uint32_t cd; ASSERT(zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; cd = 0; for (mze = avl_find(avl, &mze_tofind, &idx); mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { if (mze->mze_cd != cd) break; cd++; } return (cd); } static void mze_remove(zap_t *zap, mzap_ent_t *mze) { ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); avl_remove(&zap->zap_m.zap_avl, mze); kmem_free(mze, sizeof (mzap_ent_t)); } static void mze_destroy(zap_t *zap) { mzap_ent_t *mze; void *avlcookie = NULL; while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) kmem_free(mze, sizeof (mzap_ent_t)); avl_destroy(&zap->zap_m.zap_avl); } static zap_t * mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) { zap_t *winner; zap_t *zap; int i; ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); rw_enter(&zap->zap_rwlock, RW_WRITER); zap->zap_objset = os; zap->zap_object = obj; zap->zap_dbuf = db; if (*(uint64_t *)db->db_data != ZBT_MICRO) { mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; } else { zap->zap_ismicro = TRUE; } /* * Make sure that zap_ismicro is set before we let others see * it, because zap_lockdir() checks zap_ismicro without the lock * held. */ winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); if (winner != NULL) { rw_exit(&zap->zap_rwlock); rw_destroy(&zap->zap_rwlock); if (!zap->zap_ismicro) mutex_destroy(&zap->zap_f.zap_num_entries_mtx); kmem_free(zap, sizeof (zap_t)); return (winner); } if (zap->zap_ismicro) { zap->zap_salt = zap->zap_m.zap_phys->mz_salt; zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; avl_create(&zap->zap_m.zap_avl, mze_compare, sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; if (mze->mze_name[0]) { zap_name_t *zn; zap->zap_m.zap_num_entries++; zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); mze_insert(zap, i, zn->zn_hash); zap_name_free(zn); } } } else { zap->zap_salt = zap->zap_f.zap_phys->zap_salt; zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; ASSERT3U(sizeof (struct zap_leaf_header), ==, 2*ZAP_LEAF_CHUNKSIZE); /* * The embedded pointer table should not overlap the * other members. */ ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, &zap->zap_f.zap_phys->zap_salt); /* * The embedded pointer table should end at the end of * the block */ ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 1<zap_f.zap_phys, ==, zap->zap_dbuf->db_size); } rw_exit(&zap->zap_rwlock); return (zap); } int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) { zap_t *zap; dmu_buf_t *db; krw_t lt; int err; *zapp = NULL; err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); if (err) return (err); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif zap = dmu_buf_get_user(db); if (zap == NULL) zap = mzap_open(os, obj, db); /* * We're checking zap_ismicro without the lock held, in order to * tell what type of lock we want. Once we have some sort of * lock, see if it really is the right type. In practice this * can only be different if it was upgraded from micro to fat, * and micro wanted WRITER but fat only needs READER. */ lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; rw_enter(&zap->zap_rwlock, lt); if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { /* it was upgraded, now we only need reader */ ASSERT(lt == RW_WRITER); ASSERT(RW_READER == (!zap->zap_ismicro && fatreader) ? RW_READER : lti); rw_downgrade(&zap->zap_rwlock); lt = RW_READER; } zap->zap_objset = os; if (lt == RW_WRITER) dmu_buf_will_dirty(db, tx); ASSERT3P(zap->zap_dbuf, ==, db); ASSERT(!zap->zap_ismicro || zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); if (zap->zap_ismicro && tx && adding && zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; if (newsz > MZAP_MAX_BLKSZ) { dprintf("upgrading obj %llu: num_entries=%u\n", obj, zap->zap_m.zap_num_entries); *zapp = zap; return (mzap_upgrade(zapp, tx, 0)); } err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); ASSERT3U(err, ==, 0); zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; } *zapp = zap; return (0); } void zap_unlockdir(zap_t *zap) { rw_exit(&zap->zap_rwlock); dmu_buf_rele(zap->zap_dbuf, NULL); } static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) { mzap_phys_t *mzp; int i, sz, nchunks; int err = 0; zap_t *zap = *zapp; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); sz = zap->zap_dbuf->db_size; mzp = vmem_alloc(sz, KM_SLEEP); bcopy(zap->zap_dbuf->db_data, mzp, sz); nchunks = zap->zap_m.zap_num_chunks; if (!flags) { err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 1ULL << fzap_default_block_shift, 0, tx); if (err) { vmem_free(mzp, sz); return (err); } } dprintf("upgrading obj=%llu with %u chunks\n", zap->zap_object, nchunks); /* XXX destroy the avl later, so we can use the stored hash value */ mze_destroy(zap); fzap_upgrade(zap, tx, flags); for (i = 0; i < nchunks; i++) { mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; zap_name_t *zn; if (mze->mze_name[0] == 0) continue; dprintf("adding %s=%llu\n", mze->mze_name, mze->mze_value); zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); zap = zn->zn_zap; /* fzap_add_cd() may change zap */ zap_name_free(zn); if (err) break; } vmem_free(mzp, sz); *zapp = zap; return (err); } static void mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, dmu_tx_t *tx) { dmu_buf_t *db; mzap_phys_t *zp; VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif dmu_buf_will_dirty(db, tx); zp = db->db_data; zp->mz_block_type = ZBT_MICRO; zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; zp->mz_normflags = normflags; dmu_buf_rele(db, FTAG); if (flags != 0) { zap_t *zap; /* Only fat zap supports flags; upgrade immediately. */ VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, B_FALSE, B_FALSE, &zap)); VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); zap_unlockdir(zap); } } int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (zap_create_claim_norm(os, obj, 0, ot, bonustype, bonuslen, tx)); } int zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { int err; err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); if (err != 0) return (err); mzap_create_impl(os, obj, normflags, 0, tx); return (0); } uint64_t zap_create(objset_t *os, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); } uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); mzap_create_impl(os, obj, normflags, 0, tx); return (obj); } uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && leaf_blockshift <= SPA_MAXBLOCKSHIFT && indirect_blockshift >= SPA_MINBLOCKSHIFT && indirect_blockshift <= SPA_MAXBLOCKSHIFT); VERIFY(dmu_object_set_blocksize(os, obj, 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); mzap_create_impl(os, obj, normflags, flags, tx); return (obj); } int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) { /* * dmu_object_free will free the object number and free the * data. Freeing the data will cause our pageout function to be * called, which will destroy our data (zap_leaf_t's and zap_t). */ return (dmu_object_free(os, zapobj, tx)); } _NOTE(ARGSUSED(0)) void zap_evict(dmu_buf_t *db, void *vzap) { zap_t *zap = vzap; rw_destroy(&zap->zap_rwlock); if (zap->zap_ismicro) mze_destroy(zap); else mutex_destroy(&zap->zap_f.zap_num_entries_mtx); kmem_free(zap, sizeof (zap_t)); } int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) { zap_t *zap; int err; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); if (!zap->zap_ismicro) { err = fzap_count(zap, count); } else { *count = zap->zap_m.zap_num_entries; } zap_unlockdir(zap); return (err); } /* * zn may be NULL; if not specified, it will be computed if needed. * See also the comment above zap_entry_normalization_conflict(). */ static boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) { mzap_ent_t *other; int direction = AVL_BEFORE; boolean_t allocdzn = B_FALSE; if (zap->zap_normflags == 0) return (B_FALSE); again: for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); other && other->mze_hash == mze->mze_hash; other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { if (zn == NULL) { zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, MT_FIRST); allocdzn = B_TRUE; } if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); } } if (direction == AVL_BEFORE) { direction = AVL_AFTER; goto again; } if (allocdzn) zap_name_free(zn); return (B_FALSE); } /* * Routines for manipulating attributes. */ int zap_lookup(objset_t *os, uint64_t zapobj, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf) { return (zap_lookup_norm(os, zapobj, name, integer_size, num_integers, buf, MT_EXACT, NULL, 0, NULL)); } int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf, matchtype_t mt, char *realname, int rn_len, boolean_t *ncp) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_lookup(zn, integer_size, num_integers, buf, realname, rn_len, ncp); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { if (num_integers < 1) { err = EOVERFLOW; } else if (integer_size != 8) { err = EINVAL; } else { *(uint64_t *)buf = MZE_PHYS(zap, mze)->mze_value; (void) strlcpy(realname, MZE_PHYS(zap, mze)->mze_name, rn_len); if (ncp) { *ncp = mzap_normalization_conflict(zap, zn, mze); } } } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } fzap_prefetch(zn); zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_lookup(zn, integer_size, num_integers, buf, NULL, 0, NULL); zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_contains(objset_t *os, uint64_t zapobj, const char *name) { int err = (zap_lookup_norm(os, zapobj, name, 0, 0, NULL, MT_EXACT, NULL, 0, NULL)); if (err == EOVERFLOW || err == EINVAL) err = 0; /* found, but skipped reading the value */ return (err); } int zap_length(objset_t *os, uint64_t zapobj, const char *name, uint64_t *integer_size, uint64_t *num_integers) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_length(zn, integer_size, num_integers); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { if (integer_size) *integer_size = 8; if (num_integers) *num_integers = 1; } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t *integer_size, uint64_t *num_integers) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_length(zn, integer_size, num_integers); zap_name_free(zn); zap_unlockdir(zap); return (err); } static void mzap_addent(zap_name_t *zn, uint64_t value) { int i; zap_t *zap = zn->zn_zap; int start = zap->zap_m.zap_alloc_next; uint32_t cd; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); #ifdef ZFS_DEBUG for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { ASSERTV(mzap_ent_phys_t *mze=&zap->zap_m.zap_phys->mz_chunk[i]); ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); } #endif cd = mze_find_unused_cd(zap, zn->zn_hash); /* given the limited size of the microzap, this can't happen */ ASSERT(cd < zap_maxcd(zap)); again: for (i = start; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; if (mze->mze_name[0] == 0) { mze->mze_value = value; mze->mze_cd = cd; (void) strcpy(mze->mze_name, zn->zn_key_orig); zap->zap_m.zap_num_entries++; zap->zap_m.zap_alloc_next = i+1; if (zap->zap_m.zap_alloc_next == zap->zap_m.zap_num_chunks) zap->zap_m.zap_alloc_next = 0; mze_insert(zap, i, zn->zn_hash); return; } } if (start != 0) { start = 0; goto again; } ASSERT(!"out of entries!"); } int zap_add(objset_t *os, uint64_t zapobj, const char *key, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; int err; mzap_ent_t *mze; const uint64_t *intval = val; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc(zap, key, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else if (integer_size != 8 || num_integers != 1 || strlen(key) >= MZAP_NAME_LEN) { err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else { mze = mze_find(zn); if (mze != NULL) { err = EEXIST; } else { mzap_addent(zn, *intval); } } ASSERT(zap == zn->zn_zap); zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_add() failed */ zap_unlockdir(zap); return (err); } int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_add() failed */ zap_unlockdir(zap); return (err); } int zap_update(objset_t *os, uint64_t zapobj, const char *name, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; mzap_ent_t *mze; const uint64_t *intval = val; zap_name_t *zn; int err; #ifdef ZFS_DEBUG uint64_t oldval; /* * If there is an old value, it shouldn't change across the * lockdir (eg, due to bprewrite's xlation). */ if (integer_size == 8 && num_integers == 1) (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); #endif err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ } else if (integer_size != 8 || num_integers != 1 || strlen(name) >= MZAP_NAME_LEN) { dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", zapobj, integer_size, num_integers, name); err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ } else { mze = mze_find(zn); if (mze != NULL) { ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); MZE_PHYS(zap, mze)->mze_value = *intval; } else { mzap_addent(zn, *intval); } } ASSERT(zap == zn->zn_zap); zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ zap_unlockdir(zap); return (err); } int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; zap_name_t *zn; int err; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ zap_unlockdir(zap); return (err); } int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) { return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); } int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, matchtype_t mt, dmu_tx_t *tx) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_remove(zn, tx); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { zap->zap_m.zap_num_entries--; bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], sizeof (mzap_ent_phys_t)); mze_remove(zap, mze); } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, dmu_tx_t *tx) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_remove(zn, tx); zap_name_free(zn); zap_unlockdir(zap); return (err); } /* * Routines for iterating over the attributes. */ void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, uint64_t serialized) { zc->zc_objset = os; zc->zc_zap = NULL; zc->zc_leaf = NULL; zc->zc_zapobj = zapobj; zc->zc_serialized = serialized; zc->zc_hash = 0; zc->zc_cd = 0; } void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { zap_cursor_init_serialized(zc, os, zapobj, 0); } void zap_cursor_fini(zap_cursor_t *zc) { if (zc->zc_zap) { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); zap_unlockdir(zc->zc_zap); zc->zc_zap = NULL; } if (zc->zc_leaf) { rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); zap_put_leaf(zc->zc_leaf); zc->zc_leaf = NULL; } zc->zc_objset = NULL; } uint64_t zap_cursor_serialize(zap_cursor_t *zc) { if (zc->zc_hash == -1ULL) return (-1ULL); if (zc->zc_zap == NULL) return (zc->zc_serialized); ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); /* * We want to keep the high 32 bits of the cursor zero if we can, so * that 32-bit programs can access this. So usually use a small * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits * of the cursor. * * [ collision differentiator | zap_hashbits()-bit hash value ] */ return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); } int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) { int err; avl_index_t idx; mzap_ent_t mze_tofind; mzap_ent_t *mze; if (zc->zc_hash == -1ULL) return (ENOENT); if (zc->zc_zap == NULL) { int hb; err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, RW_READER, TRUE, FALSE, &zc->zc_zap); if (err) return (err); /* * To support zap_cursor_init_serialized, advance, retrieve, * we must add to the existing zc_cd, which may already * be 1 due to the zap_cursor_advance. */ ASSERT(zc->zc_hash == 0); hb = zap_hashbits(zc->zc_zap); zc->zc_hash = zc->zc_serialized << (64 - hb); zc->zc_cd += zc->zc_serialized >> hb; if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ zc->zc_cd = 0; } else { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); } if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { err = ENOENT; mze_tofind.mze_hash = zc->zc_hash; mze_tofind.mze_cd = zc->zc_cd; mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); if (mze == NULL) { mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, idx, AVL_AFTER); } if (mze) { mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); za->za_normalization_conflict = mzap_normalization_conflict(zc->zc_zap, NULL, mze); za->za_integer_length = 8; za->za_num_integers = 1; za->za_first_integer = mzep->mze_value; (void) strcpy(za->za_name, mzep->mze_name); zc->zc_hash = mze->mze_hash; zc->zc_cd = mze->mze_cd; err = 0; } else { zc->zc_hash = -1ULL; } } rw_exit(&zc->zc_zap->zap_rwlock); return (err); } void zap_cursor_advance(zap_cursor_t *zc) { if (zc->zc_hash == -1ULL) return; zc->zc_cd++; } int zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) { int err = 0; mzap_ent_t *mze; zap_name_t *zn; if (zc->zc_zap == NULL) { err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, RW_READER, TRUE, FALSE, &zc->zc_zap); if (err) return (err); } else { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); } zn = zap_name_alloc(zc->zc_zap, name, mt); if (zn == NULL) { rw_exit(&zc->zc_zap->zap_rwlock); return (ENOTSUP); } if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_move_to_key(zc, zn); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; goto out; } zc->zc_hash = mze->mze_hash; zc->zc_cd = mze->mze_cd; } out: zap_name_free(zn); rw_exit(&zc->zc_zap->zap_rwlock); return (err); } int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) { int err; zap_t *zap; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); bzero(zs, sizeof (zap_stats_t)); if (zap->zap_ismicro) { zs->zs_blocksize = zap->zap_dbuf->db_size; zs->zs_num_entries = zap->zap_m.zap_num_entries; zs->zs_num_blocks = 1; } else { fzap_get_stats(zap, zs); } zap_unlockdir(zap); return (0); } int zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, uint64_t *towrite, uint64_t *tooverwrite) { zap_t *zap; int err = 0; /* * Since, we don't have a name, we cannot figure out which blocks will * be affected in this operation. So, account for the worst case : * - 3 blocks overwritten: target leaf, ptrtbl block, header block * - 4 new blocks written if adding: * - 2 blocks for possibly split leaves, * - 2 grown ptrtbl blocks * * This also accomodates the case where an add operation to a fairly * large microzap results in a promotion to fatzap. */ if (name == NULL) { *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; return (err); } /* - * We lock the zap with adding == FALSE. Because, if we pass + * We lock the zap with adding == FALSE. Because, if we pass * the actual value of add, it could trigger a mzap_upgrade(). * At present we are just evaluating the possibility of this operation * and hence we donot want to trigger an upgrade. */ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); if (!zap->zap_ismicro) { zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); if (zn) { err = fzap_count_write(zn, add, towrite, tooverwrite); zap_name_free(zn); } else { /* * We treat this case as similar to (name == NULL) */ *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; } } else { /* * We are here if (name != NULL) and this is a micro-zap. * We account for the header block depending on whether it * is freeable. * * Incase of an add-operation it is hard to find out * if this add will promote this microzap to fatzap. * Hence, we consider the worst case and account for the * blocks assuming this microzap would be promoted to a * fatzap. * * 1 block overwritten : header block * 4 new blocks written : 2 new split leaf, 2 grown * ptrtbl blocks */ if (dmu_buf_freeable(zap->zap_dbuf)) *tooverwrite += SPA_MAXBLOCKSIZE; else *towrite += SPA_MAXBLOCKSIZE; if (add) { *towrite += 4 * SPA_MAXBLOCKSIZE; } } zap_unlockdir(zap); return (err); } #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(zap_create); EXPORT_SYMBOL(zap_create_norm); EXPORT_SYMBOL(zap_create_flags); EXPORT_SYMBOL(zap_create_claim); EXPORT_SYMBOL(zap_create_claim_norm); EXPORT_SYMBOL(zap_destroy); EXPORT_SYMBOL(zap_lookup); EXPORT_SYMBOL(zap_lookup_norm); EXPORT_SYMBOL(zap_lookup_uint64); EXPORT_SYMBOL(zap_contains); EXPORT_SYMBOL(zap_prefetch_uint64); EXPORT_SYMBOL(zap_count_write); EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add_uint64); EXPORT_SYMBOL(zap_update); EXPORT_SYMBOL(zap_update_uint64); EXPORT_SYMBOL(zap_length); EXPORT_SYMBOL(zap_length_uint64); EXPORT_SYMBOL(zap_remove); EXPORT_SYMBOL(zap_remove_norm); EXPORT_SYMBOL(zap_remove_uint64); EXPORT_SYMBOL(zap_count); EXPORT_SYMBOL(zap_value_search); EXPORT_SYMBOL(zap_join); EXPORT_SYMBOL(zap_join_increment); EXPORT_SYMBOL(zap_add_int); EXPORT_SYMBOL(zap_remove_int); EXPORT_SYMBOL(zap_lookup_int); EXPORT_SYMBOL(zap_increment_int); EXPORT_SYMBOL(zap_add_int_key); EXPORT_SYMBOL(zap_lookup_int_key); EXPORT_SYMBOL(zap_increment); EXPORT_SYMBOL(zap_cursor_init); EXPORT_SYMBOL(zap_cursor_fini); EXPORT_SYMBOL(zap_cursor_retrieve); EXPORT_SYMBOL(zap_cursor_advance); EXPORT_SYMBOL(zap_cursor_serialize); EXPORT_SYMBOL(zap_cursor_move_to_key); EXPORT_SYMBOL(zap_cursor_init_serialized); EXPORT_SYMBOL(zap_get_stats); #endif diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 65b0a1975294..94c91e876bd6 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1,5074 +1,5182 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 Martin Matuska * Portions Copyright 2012 Pawel Jakub Dawidek * Copyright (c) 2012, Joyent, Inc. All rights reserved. - */ -/* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_deleg.h" #include "zfs_comutil.h" kmutex_t zfsdev_state_lock; list_t zfsdev_state_list; extern void zfs_init(void); extern void zfs_fini(void); typedef int zfs_ioc_func_t(zfs_cmd_t *); typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); typedef enum { NO_NAME, POOL_NAME, DATASET_NAME } zfs_ioc_namecheck_t; typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, POOL_CHECK_READONLY = 1 << 2 } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; boolean_t zvec_his_log; zfs_ioc_poolcheck_t zvec_pool_check; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ static const char *userquota_perms[] = { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_PERM_GROUPQUOTA, }; static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc); static int zfs_check_settable(const char *name, nvpair_t *property, cred_t *cr); static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); static void history_str_free(char *buf) { kmem_free(buf, HIS_MAX_RECORD_LEN); } static char * history_str_get(zfs_cmd_t *zc) { char *buf; if (zc->zc_history == 0) return (NULL); buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG); if (copyinstr((void *)(uintptr_t)zc->zc_history, buf, HIS_MAX_RECORD_LEN, NULL) != 0) { history_str_free(buf); return (NULL); } buf[HIS_MAX_RECORD_LEN -1] = '\0'; return (buf); } /* * Check to see if the named dataset is currently defined as bootable */ static boolean_t zfs_is_bootfs(const char *name) { objset_t *os; if (dmu_objset_hold(name, FTAG, &os) == 0) { boolean_t ret; ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os))); dmu_objset_rele(os, FTAG); return (ret); } return (B_FALSE); } /* * zfs_earlier_version * * Return non-zero if the spa version is less than requested version. */ static int zfs_earlier_version(const char *name, int version) { spa_t *spa; if (spa_open(name, &spa, FTAG) == 0) { if (spa_version(spa) < version) { spa_close(spa, FTAG); return (1); } spa_close(spa, FTAG); } return (0); } /* * zpl_earlier_version * * Return TRUE if the ZPL version is less than requested version. */ static boolean_t zpl_earlier_version(const char *name, int version) { objset_t *os; boolean_t rc = B_TRUE; if (dmu_objset_hold(name, FTAG, &os) == 0) { uint64_t zplversion; if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (B_TRUE); } /* XXX reading from non-owned objset */ if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0) rc = zplversion < version; dmu_objset_rele(os, FTAG); } return (rc); } static void zfs_log_history(zfs_cmd_t *zc) { spa_t *spa; char *buf; if ((buf = history_str_get(zc)) == NULL) return; if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); spa_close(spa, FTAG); } history_str_free(buf); } /* * Policy for top-level read operations (list pools). Requires no privileges, * and can be used in the local zone, as there is no associated dataset. */ /* ARGSUSED */ static int zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) { return (0); } /* * Policy for dataset read operations (list children, get statistics). Requires * no privileges, but must be visible in the local zone. */ /* ARGSUSED */ static int zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) return (0); return (ENOENT); } static int zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) { int writable = 1; /* * The dataset must be visible by this zone -- check this first * so they don't see EPERM on something they shouldn't know about. */ if (!INGLOBALZONE(curproc) && !zone_dataset_visible(dataset, &writable)) return (ENOENT); if (INGLOBALZONE(curproc)) { /* * If the fs is zoned, only root can access it from the * global zone. */ if (secpolicy_zfs(cr) && zoned) return (EPERM); } else { /* * If we are in a local zone, the 'zoned' property must be set. */ if (!zoned) return (EPERM); /* must be writable by this zone */ if (!writable) return (EPERM); } return (0); } static int zfs_dozonecheck(const char *dataset, cred_t *cr) { uint64_t zoned; if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) return (ENOENT); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } static int zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) { uint64_t zoned; rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) { rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); return (ENOENT); } rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } +/* + * If name ends in a '@', then require recursive permissions. + */ int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; + boolean_t descendent = B_FALSE; + dsl_dataset_t *ds; + char *at; + + at = strchr(name, '@'); + if (at != NULL && at[1] == '\0') { + *at = '\0'; + descendent = B_TRUE; + } + + error = dsl_dataset_hold(name, FTAG, &ds); + if (at != NULL) + *at = '@'; + if (error != 0) + return (error); - error = zfs_dozonecheck(name, cr); + error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access(name, perm, cr); + error = dsl_deleg_access_impl(ds, descendent, perm, cr); } + + dsl_dataset_rele(ds, FTAG); return (error); } int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { int error; error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); } return (error); } /* * Policy for setting the security label property. * * Returns 0 for success, non-zero for access and other errors. */ static int zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) { #ifdef HAVE_MLSLABEL char ds_hexsl[MAXNAMELEN]; bslabel_t ds_sl, new_sl; boolean_t new_default = FALSE; uint64_t zoned; int needed_priv = -1; int error; /* First get the existing dataset label. */ error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); if (error) return (EPERM); if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) new_default = TRUE; /* The label must be translatable */ if (!new_default && (hexstr_to_label(strval, &new_sl) != 0)) return (EINVAL); /* * In a non-global zone, disallow attempts to set a label that * doesn't match that of the zone; otherwise no other checks * are needed. */ if (!INGLOBALZONE(curproc)) { if (new_default || !blequal(&new_sl, CR_SL(CRED()))) return (EPERM); return (0); } /* * For global-zone datasets (i.e., those whose zoned property is * "off", verify that the specified new label is valid for the * global zone. */ if (dsl_prop_get_integer(name, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) return (EPERM); if (!zoned) { if (zfs_check_global_label(name, strval) != 0) return (EPERM); } /* * If the existing dataset label is nondefault, check if the * dataset is mounted (label cannot be changed while mounted). * Get the zfs_sb_t; if there isn't one, then the dataset isn't * mounted (or isn't a dataset, doesn't exist, ...). */ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) { objset_t *os; static char *setsl_tag = "setsl_tag"; /* * Try to own the dataset; abort if there is any error, * (e.g., already mounted, in use, or other error). */ error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, setsl_tag, &os); if (error) return (EPERM); dmu_objset_disown(os, setsl_tag); if (new_default) { needed_priv = PRIV_FILE_DOWNGRADE_SL; goto out_check; } if (hexstr_to_label(strval, &new_sl) != 0) return (EPERM); if (blstrictdom(&ds_sl, &new_sl)) needed_priv = PRIV_FILE_DOWNGRADE_SL; else if (blstrictdom(&new_sl, &ds_sl)) needed_priv = PRIV_FILE_UPGRADE_SL; } else { /* dataset currently has a default label */ if (!new_default) needed_priv = PRIV_FILE_UPGRADE_SL; } out_check: if (needed_priv != -1) return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL)); return (0); #else return ENOTSUP; #endif /* HAVE_MLSLABEL */ } static int zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, cred_t *cr) { char *strval; /* * Check permissions for special properties. */ switch (prop) { default: break; case ZFS_PROP_ZONED: /* * Disallow setting of 'zoned' from within a local zone. */ if (!INGLOBALZONE(curproc)) return (EPERM); break; case ZFS_PROP_QUOTA: if (!INGLOBALZONE(curproc)) { uint64_t zoned; char setpoint[MAXNAMELEN]; /* * Unprivileged users are allowed to modify the * quota on things *under* (ie. contained by) * the thing they own. */ if (dsl_prop_get_integer(dsname, "zoned", &zoned, setpoint)) return (EPERM); if (!zoned || strlen(dsname) <= strlen(setpoint)) return (EPERM); } break; case ZFS_PROP_MLSLABEL: if (!is_system_labeled()) return (EPERM); if (nvpair_value_string(propval, &strval) == 0) { int err; err = zfs_set_slabel_policy(dsname, strval, CRED()); if (err != 0) return (err); } break; } return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } int zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) { int error; error = zfs_dozonecheck(zc->zc_name, cr); if (error) return (error); /* * permission to set permissions will be evaluated later in * dsl_deleg_can_allow() */ return (0); } int zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } int zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; char *cp; int error; /* * Generate the current snapshot name from the given objsetid, then * use that name for the secpolicy/zone checks. */ cp = strchr(zc->zc_name, '@'); if (cp == NULL) return (EINVAL); error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) return (error); dsl_dataset_name(ds, zc->zc_name); error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, ZFS_DELEG_PERM_SEND, cr); dsl_dataset_rele(ds, FTAG); return (error); } #ifdef HAVE_SMB_SHARE static int zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) { vnode_t *vp; int error; if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp)) != 0) return (error); /* Now make sure mntpnt and dataset are ZFS */ if (vp->v_vfsp->vfs_fstype != zfsfstype || (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); return (EPERM); } VN_RELE(vp); return (dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_SHARE, cr)); } #endif /* HAVE_SMB_SHARE */ int zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) return (EPERM); if (secpolicy_nfs(cr) == 0) { return (0); } else { return (zfs_secpolicy_deleg_share(zc, cr)); } #else return (ENOTSUP); #endif /* HAVE_SMB_SHARE */ } int zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) return (EPERM); if (secpolicy_smb(cr) == 0) { return (0); } else { return (zfs_secpolicy_deleg_share(zc, cr)); } #else return (ENOTSUP); #endif /* HAVE_SMB_SHARE */ } static int zfs_get_parent(const char *datasetname, char *parent, int parentsize) { char *cp; /* * Remove the @bla or /bla from the end of the name to get the parent. */ (void) strncpy(parent, datasetname, parentsize); cp = strrchr(parent, '@'); if (cp != NULL) { cp[0] = '\0'; } else { cp = strrchr(parent, '/'); if (cp == NULL) return (ENOENT); cp[0] = '\0'; } return (0); } int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) { int error; if ((error = zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } static int zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires * descendent mount and destroy permissions. - * Reassemble the full filesystem@snap name so dsl_deleg_access() - * can do the correct permission check. - * - * Since this routine is used when doing a recursive destroy of snapshots - * and destroying snapshots requires descendent permissions, a successfull - * check of the top level snapshot applies to snapshots of all descendent - * datasets as well. - * - * The target snapshot may not exist when doing a recursive destroy. - * In this case fallback to permissions of the parent dataset. */ static int -zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) { int error; char *dsname; - dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); + dsname = kmem_asprintf("%s@", zc->zc_name); error = zfs_secpolicy_destroy_perms(dsname, cr); if (error == ENOENT) error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); strfree(dsname); return (error); } int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) { char parentname[MAXNAMELEN]; int error; if ((error = zfs_secpolicy_write_perms(from, ZFS_DELEG_PERM_RENAME, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(from, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); if ((error = zfs_get_parent(to, parentname, sizeof (parentname))) != 0) return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (error); } static int zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } static int zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; int error; error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_PROMOTE, cr); if (error) return (error); error = dmu_objset_hold(zc->zc_name, FTAG, &clone); if (error == 0) { dsl_dataset_t *pclone = NULL; dsl_dir_t *dd; dd = clone->os_dsl_dataset->ds_dir; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_origin_obj, FTAG, &pclone); rw_exit(&dd->dd_pool->dp_config_rwlock); if (error) { dmu_objset_rele(clone, FTAG); return (error); } error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); dsl_dataset_name(pclone, parentname); dmu_objset_rele(clone, FTAG); dsl_dataset_rele(pclone, FTAG); if (error == 0) error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_PROMOTE, cr); } return (error); } static int zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) { int error; if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RECEIVE, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_CREATE, cr)); } int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) { return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_SNAPSHOT, cr)); } static int zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); } static int zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; int error; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); if (zc->zc_value[0] != '\0') { if ((error = zfs_secpolicy_write_perms(zc->zc_value, ZFS_DELEG_PERM_CLONE, cr)) != 0) return (error); } if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_MOUNT, cr); return (error); } /* * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires * SYS_CONFIG privilege, which is not available in a local zone. */ /* ARGSUSED */ static int zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); return (0); } /* * Policy for object to name lookups. */ /* ARGSUSED */ static int zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) { int error; if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0) return (0); error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr); return (error); } /* * Policy for fault injection. Requires all privileges. */ /* ARGSUSED */ static int zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) { return (secpolicy_zinject(cr)); } static int zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); if (prop == ZPROP_INVAL) { if (!zfs_prop_user(zc->zc_value)) return (EINVAL); return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_USERPROP, cr)); } else { return (zfs_secpolicy_setprop(zc->zc_name, prop, NULL, cr)); } } static int zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) { int err = zfs_secpolicy_read(zc, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); if (zc->zc_value[0] == 0) { /* * They are asking about a posix uid/gid. If it's * themself, allow it. */ if (zc->zc_objset_type == ZFS_PROP_USERUSED || zc->zc_objset_type == ZFS_PROP_USERQUOTA) { if (zc->zc_guid == crgetuid(cr)) return (0); } else { if (groupmember(zc->zc_guid, cr)) return (0); } } return (zfs_secpolicy_write_perms(zc->zc_name, userquota_perms[zc->zc_objset_type], cr)); } static int zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) { int err = zfs_secpolicy_read(zc, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); return (zfs_secpolicy_write_perms(zc->zc_name, userquota_perms[zc->zc_objset_type], cr)); } static int zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } static int zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } static int zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); } /* * Policy for allowing temporary snapshots to be taken or released */ static int zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, * hold, destroy and release all rolled into one. * Delegated diff alone is sufficient that we allow this. */ int error; if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); error = zfs_secpolicy_snapshot(zc, cr); if (!error) error = zfs_secpolicy_hold(zc, cr); if (!error) error = zfs_secpolicy_release(zc, cr); if (!error) error = zfs_secpolicy_destroy(zc, cr); return (error); } /* * Returns the nvlist as specified by the user in the zfs_cmd_t. */ static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) { char *packed; int error; nvlist_t *list = NULL; /* * Read in and unpack the user-supplied nvlist. */ if (size == 0) return (EINVAL); packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag)) != 0) { kmem_free(packed, size); return (error); } if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) { kmem_free(packed, size); return (error); } kmem_free(packed, size); *nvp = list; return (0); } static int fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) { size_t size; VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); if (size > zc->zc_nvlist_dst_size) { nvpair_t *more_errors; int n = 0; if (zc->zc_nvlist_dst_size < 1024) return (ENOMEM); VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); more_errors = nvlist_prev_nvpair(*errors, NULL); do { nvpair_t *pair = nvlist_prev_nvpair(*errors, more_errors); VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); n++; VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); } while (size > zc->zc_nvlist_dst_size); VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); ASSERT(size <= zc->zc_nvlist_dst_size); } return (0); } static int put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) { char *packed = NULL; int error = 0; size_t size; VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, KM_SLEEP) == 0); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; kmem_free(packed, size); } zc->zc_nvlist_dst_size = size; return (error); } static int get_zfs_sb(const char *dsname, zfs_sb_t **zsbp) { objset_t *os; int error; error = dmu_objset_hold(dsname, FTAG, &os); if (error) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } mutex_enter(&os->os_user_ptr_lock); *zsbp = dmu_objset_get_user(os); if (*zsbp && (*zsbp)->z_sb) { atomic_inc(&((*zsbp)->z_sb->s_active)); } else { error = ESRCH; } mutex_exit(&os->os_user_ptr_lock); dmu_objset_rele(os, FTAG); return (error); } /* * Find a zfs_sb_t for a mounted filesystem, or create our own, in which * case its z_sb will be NULL, and it will be opened as the owner. */ static int zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer) { int error = 0; if (get_zfs_sb(name, zsbp) != 0) error = zfs_sb_create(name, zsbp); if (error == 0) { rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER : RW_READER, tag); if ((*zsbp)->z_unmounted) { /* * XXX we could probably try again, since the unmounting * thread should be just about to disassociate the * objset from the zfsvfs. */ rrw_exit(&(*zsbp)->z_teardown_lock, tag); return (EBUSY); } } return (error); } static void zfs_sb_rele(zfs_sb_t *zsb, void *tag) { rrw_exit(&zsb->z_teardown_lock, tag); if (zsb->z_sb) { deactivate_super(zsb->z_sb); } else { dmu_objset_disown(zsb->z_os, zsb); zfs_sb_free(zsb); } } static int zfs_ioc_pool_create(zfs_cmd_t *zc) { int error; nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; char *buf; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) return (error); if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { nvlist_free(config); return (error); } if (props) { nvlist_t *nvl = NULL; uint64_t version = SPA_VERSION; (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version); if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) { error = EINVAL; goto pool_props_bad; } (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); if (nvl) { error = nvlist_dup(nvl, &rootprops, KM_SLEEP); if (error != 0) { nvlist_free(config); nvlist_free(props); return (error); } (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); if (error) goto pool_props_bad; } buf = history_str_get(zc); error = spa_create(zc->zc_name, config, props, buf, zplprops); /* * Set the remaining root properties */ if (!error && (error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); if (buf != NULL) history_str_free(buf); pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); return (error); } static int zfs_ioc_pool_destroy(zfs_cmd_t *zc) { int error; zfs_log_history(zc); error = spa_destroy(zc->zc_name); if (error == 0) zvol_remove_minors(zc->zc_name); return (error); } static int zfs_ioc_pool_import(zfs_cmd_t *zc) { nvlist_t *config, *props = NULL; uint64_t guid; int error; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) != 0) return (error); if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { nvlist_free(config); return (error); } if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || guid != zc->zc_guid) error = EINVAL; else error = spa_import(zc->zc_name, config, props, zc->zc_cookie); if (zc->zc_nvlist_dst != 0) { int err; if ((err = put_nvlist(zc, config)) != 0) error = err; } if (error == 0) zvol_create_minors(zc->zc_name); nvlist_free(config); if (props) nvlist_free(props); return (error); } static int zfs_ioc_pool_export(zfs_cmd_t *zc) { int error; boolean_t force = (boolean_t)zc->zc_cookie; boolean_t hardforce = (boolean_t)zc->zc_guid; zfs_log_history(zc); error = spa_export(zc->zc_name, NULL, force, hardforce); if (error == 0) zvol_remove_minors(zc->zc_name); return (error); } static int zfs_ioc_pool_configs(zfs_cmd_t *zc) { nvlist_t *configs; int error; if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) return (EEXIST); error = put_nvlist(zc, configs); nvlist_free(configs); return (error); } static int zfs_ioc_pool_stats(zfs_cmd_t *zc) { nvlist_t *config; int error; int ret = 0; error = spa_get_stats(zc->zc_name, &config, zc->zc_value, sizeof (zc->zc_value)); if (config != NULL) { ret = put_nvlist(zc, config); nvlist_free(config); /* * The config may be present even if 'error' is non-zero. * In this case we return success, and preserve the real errno * in 'zc_cookie'. */ zc->zc_cookie = error; } else { ret = error; } return (ret); } /* * Try to import the given pool, returning pool stats as appropriate so that * user land knows which devices are available and overall pool health. */ static int zfs_ioc_pool_tryimport(zfs_cmd_t *zc) { nvlist_t *tryconfig, *config; int error; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &tryconfig)) != 0) return (error); config = spa_tryimport(tryconfig); nvlist_free(tryconfig); if (config == NULL) return (EINVAL); error = put_nvlist(zc, config); nvlist_free(config); return (error); } /* * inputs: * zc_name name of the pool * zc_cookie scan func (pool_scan_func_t) */ static int zfs_ioc_pool_scan(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (zc->zc_cookie == POOL_SCAN_NONE) error = spa_scan_stop(spa); else error = spa_scan(spa, zc->zc_cookie); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_freeze(zfs_cmd_t *zc) { spa_t *spa; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error == 0) { spa_freeze(spa); spa_close(spa, FTAG); } return (error); } static int zfs_ioc_pool_upgrade(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) { spa_close(spa, FTAG); return (EINVAL); } spa_upgrade(spa, zc->zc_cookie); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_get_history(zfs_cmd_t *zc) { spa_t *spa; char *hist_buf; uint64_t size; int error; if ((size = zc->zc_history_len) == 0) return (EINVAL); if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { spa_close(spa, FTAG); return (ENOTSUP); } hist_buf = vmem_alloc(size, KM_SLEEP); if ((error = spa_history_get(spa, &zc->zc_history_offset, &zc->zc_history_len, hist_buf)) == 0) { error = ddi_copyout(hist_buf, (void *)(uintptr_t)zc->zc_history, zc->zc_history_len, zc->zc_iflags); } spa_close(spa, FTAG); vmem_free(hist_buf, size); return (error); } static int zfs_ioc_pool_reguid(zfs_cmd_t *zc) { spa_t *spa; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error == 0) { error = spa_change_guid(spa); spa_close(spa, FTAG); } return (error); } static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { int error; if ((error = dsl_dsobj_to_dsname(zc->zc_name,zc->zc_obj,zc->zc_value))) return (error); return (0); } /* * inputs: * zc_name name of filesystem * zc_obj object to find * * outputs: * zc_value name of object */ static int zfs_ioc_obj_to_path(zfs_cmd_t *zc) { objset_t *os; int error; /* XXX reading from objset not owned */ if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_obj object to find * * outputs: * zc_stat stats on object * zc_value path to object */ static int zfs_ioc_obj_to_stats(zfs_cmd_t *zc) { objset_t *os; int error; /* XXX reading from objset not owned */ if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, sizeof (zc->zc_value)); dmu_objset_rele(os, FTAG); return (error); } static int zfs_ioc_vdev_add(zfs_cmd_t *zc) { spa_t *spa; int error; nvlist_t *config, **l2cache, **spares; uint_t nl2cache = 0, nspares = 0; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config); (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache); (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES, &spares, &nspares); /* * A root pool with concatenated devices is not supported. * Thus, can not add a device to a root pool. * * Intent log device can not be added to a rootpool because * during mountroot, zil is replayed, a seperated log device * can not be accessed during the mountroot time. * * l2cache and spare devices are ok to be added to a rootpool. */ if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) { nvlist_free(config); spa_close(spa, FTAG); return (EDOM); } if (error == 0) { error = spa_vdev_add(spa, config); nvlist_free(config); } spa_close(spa, FTAG); return (error); } /* * inputs: * zc_name name of the pool * zc_nvlist_conf nvlist of devices to remove * zc_cookie to stop the remove? */ static int zfs_ioc_vdev_remove(zfs_cmd_t *zc) { spa_t *spa; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_set_state(zfs_cmd_t *zc) { spa_t *spa; int error; vdev_state_t newstate = VDEV_STATE_UNKNOWN; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); switch (zc->zc_cookie) { case VDEV_STATE_ONLINE: error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate); break; case VDEV_STATE_OFFLINE: error = vdev_offline(spa, zc->zc_guid, zc->zc_obj); break; case VDEV_STATE_FAULTED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && zc->zc_obj != VDEV_AUX_EXTERNAL) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); break; case VDEV_STATE_DEGRADED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && zc->zc_obj != VDEV_AUX_EXTERNAL) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); break; default: error = EINVAL; } zc->zc_cookie = newstate; spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_attach(zfs_cmd_t *zc) { spa_t *spa; int replacing = zc->zc_cookie; nvlist_t *config; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) == 0) { error = spa_vdev_attach(spa, zc->zc_guid, config, replacing); nvlist_free(config); } spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_detach(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_split(zfs_cmd_t *zc) { spa_t *spa; nvlist_t *config, *props = NULL; int error; boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT); if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) { spa_close(spa, FTAG); return (error); } if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { spa_close(spa, FTAG); nvlist_free(config); return (error); } error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp); spa_close(spa, FTAG); nvlist_free(config); nvlist_free(props); return (error); } static int zfs_ioc_vdev_setpath(zfs_cmd_t *zc) { spa_t *spa; char *path = zc->zc_value; uint64_t guid = zc->zc_guid; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_setpath(spa, guid, path); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_setfru(zfs_cmd_t *zc) { spa_t *spa; char *fru = zc->zc_value; uint64_t guid = zc->zc_guid; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_setfru(spa, guid, fru); spa_close(spa, FTAG); return (error); } static int zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) { int error = 0; nvlist_t *nv; dmu_objset_fast_stat(os, &zc->zc_objset_stats); if (zc->zc_nvlist_dst != 0 && (error = dsl_prop_get_all(os, &nv)) == 0) { dmu_objset_stats(os, nv); /* * NB: zvol_get_stats() will read the objset contents, * which we aren't supposed to do with a * DS_MODE_USER hold, because it could be * inconsistent. So this is a bit of a workaround... * XXX reading with out owning */ - if (!zc->zc_objset_stats.dds_inconsistent) { - if (dmu_objset_type(os) == DMU_OST_ZVOL) - error = zvol_get_stats(os, nv); + if (!zc->zc_objset_stats.dds_inconsistent && + dmu_objset_type(os) == DMU_OST_ZVOL) { + error = zvol_get_stats(os, nv); + if (error == EIO) + return (error); + VERIFY3S(error, ==, 0); } if (error == 0) error = put_nvlist(zc, nv); nvlist_free(nv); } return (error); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { objset_t *os = NULL; int error; if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (error); error = zfs_ioc_objset_stats_impl(zc, os); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_nvlist_dst received property nvlist * zc_nvlist_dst_size size of received property nvlist * * Gets received properties (distinct from local properties on or after * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from * local property values. */ static int zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) { objset_t *os = NULL; int error; nvlist_t *nv; if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (error); /* * Without this check, we would return local property values if the * caller has not already received properties on or after * SPA_VERSION_RECVD_PROPS. */ if (!dsl_prop_get_hasrecvd(os)) { dmu_objset_rele(os, FTAG); return (ENOTSUP); } if (zc->zc_nvlist_dst != 0 && (error = dsl_prop_get_received(os, &nv)) == 0) { error = put_nvlist(zc, nv); nvlist_free(nv); } dmu_objset_rele(os, FTAG); return (error); } static int nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop) { uint64_t value; int error; /* * zfs_get_zplprop() will either find a value or give us * the default value (if there is one). */ if ((error = zfs_get_zplprop(os, prop, &value)) != 0) return (error); VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0); return (0); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for zpl property nvlist * * outputs: * zc_nvlist_dst zpl property nvlist * zc_nvlist_dst_size size of zpl property nvlist */ static int zfs_ioc_objset_zplprops(zfs_cmd_t *zc) { objset_t *os; int err; /* XXX reading without owning */ if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (err); dmu_objset_fast_stat(os, &zc->zc_objset_stats); /* * NB: nvl_add_zplprop() will read the objset contents, * which we aren't supposed to do with a DS_MODE_USER * hold, because it could be inconsistent. */ if (zc->zc_nvlist_dst != 0 && !zc->zc_objset_stats.dds_inconsistent && dmu_objset_type(os) == DMU_OST_ZFS) { nvlist_t *nv; VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0) err = put_nvlist(zc, nv); nvlist_free(nv); } else { err = ENOENT; } dmu_objset_rele(os, FTAG); return (err); } static boolean_t dataset_name_hidden(const char *name) { /* * Skip over datasets that are not visible in this zone, * internal datasets (which have a $ in their name), and * temporary datasets (which have a % in their name). */ if (strchr(name, '$') != NULL) return (B_TRUE); if (strchr(name, '%') != NULL) return (B_TRUE); if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) return (B_TRUE); return (B_FALSE); } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_name name of next filesystem * zc_cookie zap cursor * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_dataset_list_next(zfs_cmd_t *zc) { objset_t *os; int error; char *p; size_t orig_len = strlen(zc->zc_name); top: if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) { if (error == ENOENT) error = ESRCH; return (error); } p = strrchr(zc->zc_name, '/'); if (p == NULL || p[1] != '\0') (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); p = zc->zc_name + strlen(zc->zc_name); /* * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0 * but is not declared void because its called by dmu_objset_find(). */ if (zc->zc_cookie == 0) { uint64_t cookie = 0; int len = sizeof (zc->zc_name) - (p - zc->zc_name); while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { if (!dataset_name_hidden(zc->zc_name)) (void) dmu_objset_prefetch(zc->zc_name, NULL); } } do { error = dmu_dir_list_next(os, sizeof (zc->zc_name) - (p - zc->zc_name), p, NULL, &zc->zc_cookie); if (error == ENOENT) error = ESRCH; - } while (error == 0 && dataset_name_hidden(zc->zc_name) && - !(zc->zc_iflags & FKIOCTL)); + } while (error == 0 && dataset_name_hidden(zc->zc_name)); dmu_objset_rele(os, FTAG); /* * If it's an internal dataset (ie. with a '$' in its name), * don't try to get stats for it, otherwise we'll return ENOENT. */ if (error == 0 && strchr(zc->zc_name, '$') == NULL) { error = zfs_ioc_objset_stats(zc); /* fill in the stats */ if (error == ENOENT) { /* We lost a race with destroy, get the next one. */ zc->zc_name[orig_len] = '\0'; goto top; } } return (error); } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_name name of next snapshot * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) { objset_t *os; int error; top: if (zc->zc_cookie == 0 && !zc->zc_simple) (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, NULL, DS_FIND_SNAPSHOTS); error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error == ENOENT ? ESRCH : error); /* * A dataset name of maximum length cannot have any snapshots, * so exit immediately. */ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { dmu_objset_rele(os, FTAG); return (ESRCH); } error = dmu_snapshot_list_next(os, sizeof (zc->zc_name) - strlen(zc->zc_name), zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie, NULL); if (error == 0 && !zc->zc_simple) { dsl_dataset_t *ds; dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; /* * Since we probably don't have a hold on this snapshot, * it's possible that the objsetid could have been destroyed * and reused for a new objset. It's OK if this happens during * a zfs send operation, since the new createtxg will be * beyond the range we're interested in. */ rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); if (error) { if (error == ENOENT) { /* Racing with destroy, get the next one. */ *strchr(zc->zc_name, '@') = '\0'; dmu_objset_rele(os, FTAG); goto top; } } else { objset_t *ossnap; error = dmu_objset_from_ds(ds, &ossnap); if (error == 0) error = zfs_ioc_objset_stats_impl(zc, ossnap); dsl_dataset_rele(ds, FTAG); } } else if (error == ENOENT) { error = ESRCH; } dmu_objset_rele(os, FTAG); /* if we failed, undo the @ that we tacked on to zc_name */ if (error) *strchr(zc->zc_name, '@') = '\0'; return (error); } static int zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) { const char *propname = nvpair_name(pair); uint64_t *valary; unsigned int vallen; const char *domain; char *dash; zfs_userquota_prop_t type; uint64_t rid; uint64_t quota; zfs_sb_t *zsb; int err; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) != 0) return (EINVAL); } /* * A correctly constructed propname is encoded as * userquota@-. */ if ((dash = strchr(propname, '-')) == NULL || nvpair_value_uint64_array(pair, &valary, &vallen) != 0 || vallen != 3) return (EINVAL); domain = dash + 1; type = valary[0]; rid = valary[1]; quota = valary[2]; err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (err == 0) { err = zfs_set_userquota(zsb, type, domain, rid, quota); zfs_sb_rele(zsb, FTAG); } return (err); } /* * If the named property is one that has a special function to set its value, * return 0 on success and a positive error code on failure; otherwise if it is * not one of the special properties handled by this function, return -1. * * XXX: It would be better for callers of the property interface if we handled * these special cases in dsl_prop.c (in the dsl layer). */ static int zfs_prop_set_special(const char *dsname, zprop_source_t source, nvpair_t *pair) { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; int err; if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) return (zfs_prop_set_userquota(dsname, pair)); return (-1); } if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) == 0); } if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) return (-1); VERIFY(0 == nvpair_value_uint64(pair, &intval)); switch (prop) { case ZFS_PROP_QUOTA: err = dsl_dir_set_quota(dsname, source, intval); break; case ZFS_PROP_REFQUOTA: err = dsl_dataset_set_quota(dsname, source, intval); break; case ZFS_PROP_RESERVATION: err = dsl_dir_set_reservation(dsname, source, intval); break; case ZFS_PROP_REFRESERVATION: err = dsl_dataset_set_reservation(dsname, source, intval); break; case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, intval); break; case ZFS_PROP_VERSION: { zfs_sb_t *zsb; if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0) break; err = zfs_set_version(zsb, intval); zfs_sb_rele(zsb, FTAG); if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { zfs_cmd_t *zc; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); (void) strcpy(zc->zc_name, dsname); (void) zfs_ioc_userspace_upgrade(zc); kmem_free(zc, sizeof (zfs_cmd_t)); } break; } default: err = -1; } return (err); } /* * This function is best effort. If it fails to set any of the given properties, * it continues to set as many as it can and returns the first error * encountered. If the caller provides a non-NULL errlist, it also gives the * complete list of names of all the properties it failed to set along with the * corresponding error numbers. The caller is responsible for freeing the * returned errlist. * * If every property is set successfully, zero is returned and the list pointed * at by errlist is NULL. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, nvlist_t **errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; nvlist_t *genericnvl; nvlist_t *errors; nvlist_t *retrynvl; VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); retry: pair = NULL; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); int err = 0; /* decode the property value */ propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; } /* Validate value type */ if (err == 0 && prop == ZPROP_INVAL) { if (zfs_prop_user(propname)) { if (nvpair_type(propval) != DATA_TYPE_STRING) err = EINVAL; } else if (zfs_prop_userquota(propname)) { if (nvpair_type(propval) != DATA_TYPE_UINT64_ARRAY) err = EINVAL; + } else { + err = EINVAL; } } else if (err == 0) { if (nvpair_type(propval) == DATA_TYPE_STRING) { if (zfs_prop_get_type(prop) != PROP_TYPE_STRING) err = EINVAL; } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; VERIFY(nvpair_value_uint64(propval, &intval) == 0); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: break; case PROP_TYPE_STRING: err = EINVAL; break; case PROP_TYPE_INDEX: if (zfs_prop_index_to_string(prop, intval, &unused) != 0) err = EINVAL; break; default: cmn_err(CE_PANIC, "unknown property type"); } } else { err = EINVAL; } } /* Validate permissions */ if (err == 0) err = zfs_check_settable(dsname, pair, CRED()); if (err == 0) { err = zfs_prop_set_special(dsname, source, pair); if (err == -1) { /* * For better performance we build up a list of * properties to set in a single transaction. */ err = nvlist_add_nvpair(genericnvl, pair); } else if (err != 0 && nvl != retrynvl) { /* * This may be a spurious error caused by * receiving quota and reservation out of order. * Try again in a second pass. */ err = nvlist_add_nvpair(retrynvl, pair); } } if (err != 0) VERIFY(nvlist_add_int32(errors, propname, err) == 0); } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { nvl = retrynvl; goto retry; } if (!nvlist_empty(genericnvl) && dsl_props_set(dsname, source, genericnvl) != 0) { /* * If this fails, we still want to set as many properties as we * can, so try setting them individually. */ pair = NULL; while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) { const char *propname = nvpair_name(pair); int err = 0; propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) == 0); } if (nvpair_type(propval) == DATA_TYPE_STRING) { VERIFY(nvpair_value_string(propval, &strval) == 0); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { VERIFY(nvpair_value_uint64(propval, &intval) == 0); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { VERIFY(nvlist_add_int32(errors, propname, err) == 0); } } } nvlist_free(genericnvl); nvlist_free(retrynvl); if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { nvlist_free(errors); errors = NULL; } else { VERIFY(nvpair_value_int32(pair, &rv) == 0); } if (errlist == NULL) nvlist_free(errors); else *errlist = errors; return (rv); } /* * Check that all the properties are valid user properties. */ static int zfs_check_userprops(char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); char *valstr; if (!zfs_prop_user(propname) || nvpair_type(pair) != DATA_TYPE_STRING) return (EINVAL); if ((error = zfs_secpolicy_write_perms(fsname, ZFS_DELEG_PERM_USERPROP, CRED()))) return (error); if (strlen(propname) >= ZAP_MAXNAMELEN) return (ENAMETOOLONG); VERIFY(nvpair_value_string(pair, &valstr) == 0); if (strlen(valstr) >= ZAP_MAXVALUELEN) return (E2BIG); } return (0); } static void props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) { nvpair_t *pair; VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); pair = NULL; while ((pair = nvlist_next_nvpair(props, pair)) != NULL) { if (nvlist_exists(skipped, nvpair_name(pair))) continue; VERIFY(nvlist_add_nvpair(*newprops, pair) == 0); } } static int clear_received_props(objset_t *os, const char *fs, nvlist_t *props, nvlist_t *skipped) { int err = 0; nvlist_t *cleared_props = NULL; props_skip(props, skipped, &cleared_props); if (!nvlist_empty(cleared_props)) { /* * Acts on local properties until the dataset has received * properties at least once on or after SPA_VERSION_RECVD_PROPS. */ zprop_source_t flags = (ZPROP_SRC_NONE | (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); } nvlist_free(cleared_props); return (err); } /* * inputs: * zc_name name of filesystem * zc_value name of property to set * zc_nvlist_src{_size} nvlist of properties to apply * zc_cookie received properties flag * * outputs: * zc_nvlist_dst{_size} error for each unapplied received property */ static int zfs_ioc_set_prop(zfs_cmd_t *zc) { nvlist_t *nvl; boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); nvlist_t *errors = NULL; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvl)) != 0) return (error); if (received) { nvlist_t *origprops; objset_t *os; if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { if (dsl_prop_get_received(os, &origprops) == 0) { (void) clear_received_props(os, zc->zc_name, origprops, nvl); nvlist_free(origprops); } dsl_prop_set_hasrecvd(os); dmu_objset_rele(os, FTAG); } } error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); } nvlist_free(errors); nvlist_free(nvl); return (error); } /* * inputs: * zc_name name of filesystem * zc_value name of property to inherit * zc_cookie revert to received value if TRUE * * outputs: none */ static int zfs_ioc_inherit_prop(zfs_cmd_t *zc) { const char *propname = zc->zc_value; zfs_prop_t prop = zfs_name_to_prop(propname); boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_NONE /* revert to received value, if any */ : ZPROP_SRC_INHERITED); /* explicitly inherit */ if (received) { nvlist_t *dummy; nvpair_t *pair; zprop_type_t type; int err; /* * zfs_prop_set_special() expects properties in the form of an * nvpair with type info. */ if (prop == ZPROP_INVAL) { if (!zfs_prop_user(propname)) return (EINVAL); type = PROP_TYPE_STRING; } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) { return (EINVAL); } else { type = zfs_prop_get_type(prop); } VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0); switch (type) { case PROP_TYPE_STRING: VERIFY(0 == nvlist_add_string(dummy, propname, "")); break; case PROP_TYPE_NUMBER: case PROP_TYPE_INDEX: VERIFY(0 == nvlist_add_uint64(dummy, propname, 0)); break; default: nvlist_free(dummy); return (EINVAL); } pair = nvlist_next_nvpair(dummy, NULL); err = zfs_prop_set_special(zc->zc_name, source, pair); nvlist_free(dummy); if (err != -1) return (err); /* special property already handled */ } else { /* * Only check this in the non-received case. We want to allow * 'inherit -S' to revert non-inheritable properties like quota * and reservation to the received or default values even though * they are not considered inheritable. */ if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop)) return (EINVAL); } /* the property name has been validated by zfs_secpolicy_inherit() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } static int zfs_ioc_pool_set_props(zfs_cmd_t *zc) { nvlist_t *props; spa_t *spa; int error; nvpair_t *pair; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) return (error); /* * If the only property is the configfile, then just do a spa_lookup() * to handle the faulted case. */ pair = nvlist_next_nvpair(props, NULL); if (pair != NULL && strcmp(nvpair_name(pair), zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 && nvlist_next_nvpair(props, pair) == NULL) { mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_config_sync(spa, B_FALSE, B_TRUE); } mutex_exit(&spa_namespace_lock); if (spa != NULL) { nvlist_free(props); return (0); } } if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { nvlist_free(props); return (error); } error = spa_prop_set(spa, props); nvlist_free(props); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_get_props(zfs_cmd_t *zc) { spa_t *spa; int error; nvlist_t *nvp = NULL; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { /* * If the pool is faulted, there may be properties we can still * get (such as altroot and cachefile), so attempt to get them * anyway. */ mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) error = spa_prop_get(spa, &nvp); mutex_exit(&spa_namespace_lock); } else { error = spa_prop_get(spa, &nvp); spa_close(spa, FTAG); } if (error == 0 && zc->zc_nvlist_dst != 0) error = put_nvlist(zc, nvp); else error = EFAULT; nvlist_free(nvp); return (error); } /* * inputs: * zc_name name of volume * * outputs: none */ static int zfs_ioc_create_minor(zfs_cmd_t *zc) { return (zvol_create_minor(zc->zc_name)); } /* * inputs: * zc_name name of volume * * outputs: none */ static int zfs_ioc_remove_minor(zfs_cmd_t *zc) { return (zvol_remove_minor(zc->zc_name)); } /* * inputs: * zc_name name of filesystem * zc_nvlist_src{_size} nvlist of delegated permissions * zc_perm_action allow/unallow flag * * outputs: none */ static int zfs_ioc_set_fsacl(zfs_cmd_t *zc) { int error; nvlist_t *fsaclnv = NULL; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &fsaclnv)) != 0) return (error); /* * Verify nvlist is constructed correctly */ if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) { nvlist_free(fsaclnv); return (EINVAL); } /* * If we don't have PRIV_SYS_MOUNT, then validate * that user is allowed to hand out each permission in * the nvlist(s) */ error = secpolicy_zfs(CRED()); if (error) { if (zc->zc_perm_action == B_FALSE) { error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, CRED()); } else { error = dsl_deleg_can_unallow(zc->zc_name, fsaclnv, CRED()); } } if (error == 0) error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action); nvlist_free(fsaclnv); return (error); } /* * inputs: * zc_name name of filesystem * * outputs: * zc_nvlist_src{_size} nvlist of delegated permissions */ static int zfs_ioc_get_fsacl(zfs_cmd_t *zc) { nvlist_t *nvp; int error; if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) { error = put_nvlist(zc, nvp); nvlist_free(nvp); } return (error); } /* ARGSUSED */ static void zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) { zfs_creat_t *zct = arg; zfs_create_fs(os, cr, zct->zct_zplprops, tx); } #define ZFS_PROP_UNDEFINED ((uint64_t)-1) /* * inputs: * createprops list of properties requested by creator * default_zplver zpl version to use if unspecified in createprops * fuids_ok fuids allowed in this version of the spa? * os parent objset pointer (NULL if root fs) * * outputs: * zplprops values for the zplprops we attach to the master node object * is_ci true if requested file system will be purely case-insensitive * * Determine the settings for utf8only, normalization and * casesensitivity. Specific values may have been requested by the * creator and/or we can inherit values from the parent dataset. If * the file system is of too early a vintage, a creator can not * request settings for these properties, even if the requested * setting is the default value. We don't actually want to create dsl * properties for these, so remove them from the source nvlist after * processing. */ static int zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { uint64_t sense = ZFS_PROP_UNDEFINED; uint64_t norm = ZFS_PROP_UNDEFINED; uint64_t u8 = ZFS_PROP_UNDEFINED; int error; ASSERT(zplprops != NULL); /* * Pull out creator prop choices, if any. */ if (createprops) { (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_VERSION), &zplver); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE)); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_CASE), &sense); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_CASE)); } /* * If the zpl version requested is whacky or the file system * or pool is version is too "young" to support normalization * and the creator tried to set a value for one of the props, * error out. */ if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) || (zplver >= ZPL_VERSION_FUID && !fuids_ok) || (zplver >= ZPL_VERSION_SA && !sa_ok) || (zplver < ZPL_VERSION_NORMALIZATION && (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED || sense != ZFS_PROP_UNDEFINED))) return (ENOTSUP); /* * Put the version in the zplprops */ VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0); if (norm == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0) return (error); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0); /* * If we're normalizing, names must always be valid UTF-8 strings. */ if (norm) u8 = 1; if (u8 == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0) return (error); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0); if (sense == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0) return (error); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0); if (is_ci) *is_ci = (sense == ZFS_CASE_INSENSITIVE); return (0); } static int zfs_fill_zplprops(const char *dataset, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { boolean_t fuids_ok, sa_ok; uint64_t zplver = ZPL_VERSION; objset_t *os = NULL; char parentname[MAXNAMELEN]; char *cp; spa_t *spa; uint64_t spa_vers; int error; (void) strlcpy(parentname, dataset, sizeof (parentname)); cp = strrchr(parentname, '/'); ASSERT(cp != NULL); cp[0] = '\0'; if ((error = spa_open(dataset, &spa, FTAG)) != 0) return (error); spa_vers = spa_version(spa); spa_close(spa, FTAG); zplver = zfs_zpl_version_map(spa_vers); fuids_ok = (zplver >= ZPL_VERSION_FUID); sa_ok = (zplver >= ZPL_VERSION_SA); /* * Open parent object set so we can inherit zplprop values. */ if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0) return (error); error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops, zplprops, is_ci); dmu_objset_rele(os, FTAG); return (error); } static int zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { boolean_t fuids_ok; boolean_t sa_ok; uint64_t zplver = ZPL_VERSION; int error; zplver = zfs_zpl_version_map(spa_vers); fuids_ok = (zplver >= ZPL_VERSION_FUID); sa_ok = (zplver >= ZPL_VERSION_SA); error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok, createprops, zplprops, is_ci); return (error); } /* * inputs: * zc_objset_type type of objset to create (fs vs zvol) * zc_name name of new objset * zc_value name of snapshot to clone from (may be empty) * zc_nvlist_src{_size} nvlist of properties to apply * * outputs: none */ static int zfs_ioc_create(zfs_cmd_t *zc) { objset_t *clone; int error = 0; zfs_creat_t zct; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); dmu_objset_type_t type = zc->zc_objset_type; switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; case DMU_OST_ZVOL: cbfunc = zvol_create_cb; break; default: cbfunc = NULL; break; } if (strchr(zc->zc_name, '@') || strchr(zc->zc_name, '%')) return (EINVAL); if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvprops)) != 0) return (error); zct.zct_zplprops = NULL; zct.zct_props = nvprops; if (zc->zc_value[0] != '\0') { /* * We're creating a clone of an existing snapshot. */ zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { nvlist_free(nvprops); return (EINVAL); } error = dmu_objset_hold(zc->zc_value, FTAG, &clone); if (error) { nvlist_free(nvprops); return (error); } error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); dmu_objset_rele(clone, FTAG); if (error) { nvlist_free(nvprops); return (error); } } else { boolean_t is_insensitive = B_FALSE; if (cbfunc == NULL) { nvlist_free(nvprops); return (EINVAL); } if (type == DMU_OST_ZVOL) { uint64_t volsize, volblocksize; if (nvprops == NULL || nvlist_lookup_uint64(nvprops, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) { nvlist_free(nvprops); return (EINVAL); } if ((error = nvlist_lookup_uint64(nvprops, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize)) != 0 && error != ENOENT) { nvlist_free(nvprops); return (EINVAL); } if (error != 0) volblocksize = zfs_prop_default_numeric( ZFS_PROP_VOLBLOCKSIZE); if ((error = zvol_check_volblocksize( volblocksize)) != 0 || (error = zvol_check_volsize(volsize, volblocksize)) != 0) { nvlist_free(nvprops); return (error); } } else if (type == DMU_OST_ZFS) { int error; /* * We have to have normalization and * case-folding flags correct when we do the * file system creation, so go figure them out * now. */ VERIFY(nvlist_alloc(&zct.zct_zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops(zc->zc_name, nvprops, zct.zct_zplprops, &is_insensitive); if (error != 0) { nvlist_free(nvprops); nvlist_free(zct.zct_zplprops); return (error); } } error = dmu_objset_create(zc->zc_name, type, is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); nvlist_free(zct.zct_zplprops); } /* * It would be nice to do this atomically. */ if (error == 0) { error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, nvprops, NULL); if (error != 0) (void) dmu_objset_destroy(zc->zc_name, B_FALSE); } nvlist_free(nvprops); return (error); } /* * inputs: * zc_name name of filesystem * zc_value short name of snapshot * zc_cookie recursive flag * zc_nvlist_src[_size] property list * * outputs: * zc_value short snapname (i.e. part after the '@') */ static int zfs_ioc_snapshot(zfs_cmd_t *zc) { nvlist_t *nvprops = NULL; int error; boolean_t recursive = zc->zc_cookie; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvprops)) != 0) return (error); error = zfs_check_userprops(zc->zc_name, nvprops); if (error) goto out; if (!nvlist_empty(nvprops) && zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { error = ENOTSUP; goto out; } error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, nvprops, recursive, B_FALSE, -1); out: nvlist_free(nvprops); return (error); } /* * inputs: * name dataset name, or when 'arg == NULL' the full snapshot name * arg short snapshot name (i.e. part after the '@') */ int zfs_unmount_snap(const char *name, void *arg) { zfs_sb_t *zsb = NULL; char *dsname; char *snapname; char *fullname; char *ptr; int error; if (arg) { dsname = strdup(name); snapname = strdup(arg); } else { ptr = strchr(name, '@'); if (ptr) { dsname = strdup(name); dsname[ptr - name] = '\0'; snapname = strdup(ptr + 1); } else { return (0); } } fullname = kmem_asprintf("%s@%s", dsname, snapname); error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (error == 0) { error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); zfs_sb_rele(zsb, FTAG); /* Allow ENOENT for consistency with upstream */ if (error == ENOENT) error = 0; } strfree(dsname); strfree(snapname); strfree(fullname); return (error); } /* * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot + * zc_name name of filesystem, snaps must be under it + * zc_nvlist_src[_size] full names of snapshots to destroy * zc_defer_destroy mark for deferred destroy * - * outputs: none + * outputs: + * zc_name on failure, name of failed snapshot */ static int -zfs_ioc_destroy_snaps(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) { - int err; + int err, len; + nvlist_t *nvl; + nvpair_t *pair; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - err = dmu_objset_find(zc->zc_name, - zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN); - if (err) + if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvl)) != 0) return (err); - return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value, - zc->zc_defer_destroy)); + + len = strlen(zc->zc_name); + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + const char *name = nvpair_name(pair); + /* + * The snap name must be underneath the zc_name. This ensures + * that our permission checks were legitimate. + */ + if (strncmp(zc->zc_name, name, len) != 0 || + (name[len] != '@' && name[len] != '/')) { + nvlist_free(nvl); + return (EINVAL); + } + + (void) zfs_unmount_snap(name, NULL); + } + + err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, + zc->zc_name); + nvlist_free(nvl); + return (err); } /* * inputs: * zc_name name of dataset to destroy * zc_objset_type type of objset * zc_defer_destroy mark for deferred destroy * * outputs: none */ static int zfs_ioc_destroy(zfs_cmd_t *zc) { int err; if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { err = zfs_unmount_snap(zc->zc_name, NULL); if (err) return (err); } err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy); if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0) (void) zvol_remove_minor(zc->zc_name); return (err); } /* * inputs: * zc_name name of dataset to rollback (to most recent snapshot) * * outputs: none */ static int zfs_ioc_rollback(zfs_cmd_t *zc) { dsl_dataset_t *ds, *clone; int error; zfs_sb_t *zsb; char *clone_name; error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); if (error) return (error); /* must not be a snapshot */ if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_rele(ds, FTAG); return (EINVAL); } /* must have a most recent snapshot */ if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { dsl_dataset_rele(ds, FTAG); return (EINVAL); } /* * Create clone of most recent snapshot. */ clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); if (error) goto out; error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); if (error) goto out; /* * Do clone swap. */ if (get_zfs_sb(zc->zc_name, &zsb) == 0) { error = zfs_suspend_fs(zsb); if (error == 0) { int resume_err; if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { error = dsl_dataset_clone_swap(clone, ds, B_TRUE); dsl_dataset_disown(ds, FTAG); ds = NULL; } else { error = EBUSY; } resume_err = zfs_resume_fs(zsb, zc->zc_name); error = error ? error : resume_err; } deactivate_super(zsb->z_sb); } else { if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { error = dsl_dataset_clone_swap(clone, ds, B_TRUE); dsl_dataset_disown(ds, FTAG); ds = NULL; } else { error = EBUSY; } } /* * Destroy clone (which also closes it). */ (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); out: strfree(clone_name); if (ds) dsl_dataset_rele(ds, FTAG); return (error); } /* * inputs: * zc_name old name of dataset * zc_value new name of dataset * zc_cookie recursive flag (only valid for snapshots) * * outputs: none */ static int zfs_ioc_rename(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie & 1; int err; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '%')) return (EINVAL); /* * Unmount snapshot unless we're doing a recursive rename, * in which case the dataset code figures out which snapshots * to unmount. */ if (!recursive && strchr(zc->zc_name, '@') != NULL && zc->zc_objset_type == DMU_OST_ZFS) { err = zfs_unmount_snap(zc->zc_name, NULL); if (err) return (err); } err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive); if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) { (void) zvol_remove_minor(zc->zc_name); (void) zvol_create_minor(zc->zc_value); } return (err); } static int zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) { const char *propname = nvpair_name(pair); boolean_t issnap = (strchr(dsname, '@') != NULL); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; int err; if (prop == ZPROP_INVAL) { if (zfs_prop_user(propname)) { if ((err = zfs_secpolicy_write_perms(dsname, ZFS_DELEG_PERM_USERPROP, cr))) return (err); return (0); } if (!issnap && zfs_prop_userquota(propname)) { const char *perm = NULL; const char *uq_prefix = zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA]; const char *gq_prefix = zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA]; if (strncmp(propname, uq_prefix, strlen(uq_prefix)) == 0) { perm = ZFS_DELEG_PERM_USERQUOTA; } else if (strncmp(propname, gq_prefix, strlen(gq_prefix)) == 0) { perm = ZFS_DELEG_PERM_GROUPQUOTA; } else { /* USERUSED and GROUPUSED are read-only */ return (EINVAL); } if ((err = zfs_secpolicy_write_perms(dsname, perm, cr))) return (err); return (0); } return (EINVAL); } if (issnap) return (EINVAL); if (nvpair_type(pair) == DATA_TYPE_NVLIST) { /* * dsl_prop_get_all_impl() returns properties in this * format. */ nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) == 0); } /* * Check that this value is valid for this pool version */ switch (prop) { case ZFS_PROP_COMPRESSION: /* * If the user specified gzip compression, make sure * the SPA supports it. We ignore any errors here since * we'll catch them later. */ if (nvpair_type(pair) == DATA_TYPE_UINT64 && nvpair_value_uint64(pair, &intval) == 0) { if (intval >= ZIO_COMPRESS_GZIP_1 && intval <= ZIO_COMPRESS_GZIP_9 && zfs_earlier_version(dsname, SPA_VERSION_GZIP_COMPRESSION)) { return (ENOTSUP); } if (intval == ZIO_COMPRESS_ZLE && zfs_earlier_version(dsname, SPA_VERSION_ZLE_COMPRESSION)) return (ENOTSUP); /* * If this is a bootable dataset then * verify that the compression algorithm * is supported for booting. We must return * something other than ENOTSUP since it * implies a downrev pool version. */ if (zfs_is_bootfs(dsname) && !BOOTFS_COMPRESS_VALID(intval)) { return (ERANGE); } } break; case ZFS_PROP_COPIES: if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS)) return (ENOTSUP); break; case ZFS_PROP_DEDUP: if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) return (ENOTSUP); break; case ZFS_PROP_SHARESMB: if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) return (ENOTSUP); break; case ZFS_PROP_ACLINHERIT: if (nvpair_type(pair) == DATA_TYPE_UINT64 && nvpair_value_uint64(pair, &intval) == 0) { if (intval == ZFS_ACL_PASSTHROUGH_X && zfs_earlier_version(dsname, SPA_VERSION_PASSTHROUGH_X)) return (ENOTSUP); } break; default: break; } return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); } /* * Removes properties from the given props list that fail permission checks * needed to clear them and to restore them in case of a receive error. For each * property, make sure we have both set and inherit permissions. * * Returns the first error encountered if any permission checks fail. If the * caller provides a non-NULL errlist, it also gives the complete list of names * of all the properties that failed a permission check along with the * corresponding error numbers. The caller is responsible for freeing the * returned errlist. * * If every property checks out successfully, zero is returned and the list * pointed at by errlist is NULL. */ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) { zfs_cmd_t *zc; nvpair_t *pair, *next_pair; nvlist_t *errors; int err, rv = 0; if (props == NULL) return (0); VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); (void) strcpy(zc->zc_name, dataset); pair = nvlist_next_nvpair(props, NULL); while (pair != NULL) { next_pair = nvlist_next_nvpair(props, pair); (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); } pair = next_pair; } kmem_free(zc, sizeof (zfs_cmd_t)); if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { nvlist_free(errors); errors = NULL; } else { VERIFY(nvpair_value_int32(pair, &rv) == 0); } if (errlist == NULL) nvlist_free(errors); else *errlist = errors; return (rv); } static boolean_t propval_equals(nvpair_t *p1, nvpair_t *p2) { if (nvpair_type(p1) == DATA_TYPE_NVLIST) { /* dsl_prop_get_all_impl() format */ nvlist_t *attrs; VERIFY(nvpair_value_nvlist(p1, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1) == 0); } if (nvpair_type(p2) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(p2, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2) == 0); } if (nvpair_type(p1) != nvpair_type(p2)) return (B_FALSE); if (nvpair_type(p1) == DATA_TYPE_STRING) { char *valstr1, *valstr2; VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0); VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0); return (strcmp(valstr1, valstr2) == 0); } else { uint64_t intval1, intval2; VERIFY(nvpair_value_uint64(p1, &intval1) == 0); VERIFY(nvpair_value_uint64(p2, &intval2) == 0); return (intval1 == intval2); } } /* * Remove properties from props if they are not going to change (as determined * by comparison with origprops). Remove them from origprops as well, since we * do not need to clear or restore properties that won't change. */ static void props_reduce(nvlist_t *props, nvlist_t *origprops) { nvpair_t *pair, *next_pair; if (origprops == NULL) return; /* all props need to be received */ pair = nvlist_next_nvpair(props, NULL); while (pair != NULL) { const char *propname = nvpair_name(pair); nvpair_t *match; next_pair = nvlist_next_nvpair(props, pair); if ((nvlist_lookup_nvpair(origprops, propname, &match) != 0) || !propval_equals(pair, match)) goto next; /* need to set received value */ /* don't clear the existing received value */ (void) nvlist_remove_nvpair(origprops, match); /* don't bother receiving the property */ (void) nvlist_remove_nvpair(props, pair); next: pair = next_pair; } } #ifdef DEBUG static boolean_t zfs_ioc_recv_inject_err; #endif /* * inputs: * zc_name name of containing filesystem * zc_nvlist_src{_size} nvlist of properties to apply * zc_value name of snapshot to create * zc_string name of clone origin (if DRR_FLAG_CLONE) * zc_cookie file descriptor to recv from * zc_begin_record the BEGIN record of the stream (not byteswapped) * zc_guid force flag * zc_cleanup_fd cleanup-on-exit file descriptor * zc_action_handle handle for this guid/ds mapping (or zero on first call) * * outputs: * zc_cookie number of bytes read * zc_nvlist_dst{_size} error for each unapplied received property * zc_obj zprop_errflags_t * zc_action_handle handle for this guid/ds mapping */ static int zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; objset_t *os; dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int fd; int error = 0; int props_error = 0; nvlist_t *errors; offset_t off; nvlist_t *props = NULL; /* sent properties */ nvlist_t *origprops = NULL; /* existing properties */ objset_t *origin = NULL; char *tosnap; char tofs[ZFS_MAXNAMELEN]; boolean_t first_recvd_props = B_FALSE; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '@') == NULL || strchr(zc->zc_value, '%')) return (EINVAL); (void) strcpy(tofs, zc->zc_value); tosnap = strchr(tofs, '@'); *tosnap++ = '\0'; if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props)) != 0) return (error); fd = zc->zc_cookie; fp = getf(fd); if (fp == NULL) { nvlist_free(props); return (EBADF); } VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) && !dsl_prop_get_hasrecvd(os)) { first_recvd_props = B_TRUE; } /* * If new received properties are supplied, they are to * completely replace the existing received properties, so stash * away the existing ones. */ if (dsl_prop_get_received(os, &origprops) == 0) { nvlist_t *errlist = NULL; /* * Don't bother writing a property if its value won't * change (and avoid the unnecessary security checks). * * The first receive after SPA_VERSION_RECVD_PROPS is a * special case where we blow away all local properties * regardless. */ if (!first_recvd_props) props_reduce(props, origprops); if (zfs_check_clearable(tofs, origprops, &errlist) != 0) (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); } dmu_objset_rele(os, FTAG); } if (zc->zc_string[0]) { error = dmu_objset_hold(zc->zc_string, FTAG, &origin); if (error) goto out; } error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds, &zc->zc_begin_record, force, origin, &drc); if (origin) dmu_objset_rele(origin, FTAG); if (error) goto out; /* * Set properties before we receive the stream so that they are applied * to the new data. Note that we must call dmu_recv_stream() if * dmu_recv_begin() succeeds. */ if (props) { nvlist_t *errlist; if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) first_recvd_props = B_TRUE; } else if (origprops != NULL) { if (clear_received_props(os, tofs, origprops, first_recvd_props ? NULL : props) != 0) zc->zc_obj |= ZPROP_ERR_NOCLEAR; } else { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } dsl_prop_set_hasrecvd(os); } else if (!drc.drc_newfs) { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, props, &errlist); (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); } if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. */ props_error = EINVAL; } off = fp->f_offset; error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd, &zc->zc_action_handle); if (error == 0) { zfs_sb_t *zsb = NULL; if (get_zfs_sb(tofs, &zsb) == 0) { /* online recv */ int end_err; error = zfs_suspend_fs(zsb); /* * If the suspend fails, then the recv_end will * likely also fail, and clean up after itself. */ end_err = dmu_recv_end(&drc); if (error == 0) error = zfs_resume_fs(zsb, tofs); error = error ? error : end_err; deactivate_super(zsb->z_sb); } else { error = dmu_recv_end(&drc); } } zc->zc_cookie = off - fp->f_offset; if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; #ifdef DEBUG if (zfs_ioc_recv_inject_err) { zfs_ioc_recv_inject_err = B_FALSE; error = 1; } #endif /* * On error, restore the original props. */ if (error && props) { if (dmu_objset_hold(tofs, FTAG, &os) == 0) { if (clear_received_props(os, tofs, props, NULL) != 0) { /* * We failed to clear the received properties. * Since we may have left a $recvd value on the * system, we can't clear the $hasrecvd flag. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } else if (first_recvd_props) { dsl_prop_unset_hasrecvd(os); } dmu_objset_rele(os, FTAG); } else if (!drc.drc_newfs) { /* We failed to clear the received properties. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } if (origprops == NULL && !drc.drc_newfs) { /* We failed to stash the original properties. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } /* * dsl_props_set() will not convert RECEIVED to LOCAL on or * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL * explictly if we're restoring local properties cleared in the * first new-style receive. */ if (origprops != NULL && zfs_set_prop_nvlist(tofs, (first_recvd_props ? ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED), origprops, NULL) != 0) { /* * We stashed the original properties but failed to * restore them. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } } out: nvlist_free(props); nvlist_free(origprops); nvlist_free(errors); releasef(fd); if (error == 0) error = props_error; return (error); } /* * inputs: * zc_name name of snapshot to send * zc_cookie file descriptor to send stream to * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) * zc_sendobj objsetid of snapshot to send * zc_fromobj objsetid of incremental fromsnap (may be zero) + * zc_guid if set, estimate size of stream only. zc_cookie is ignored. + * output size in zc_objset_type. * * outputs: none */ static int zfs_ioc_send(zfs_cmd_t *zc) { objset_t *fromsnap = NULL; objset_t *tosnap; - file_t *fp; int error; offset_t off; dsl_dataset_t *ds; dsl_dataset_t *dsfrom = NULL; spa_t *spa; dsl_pool_t *dp; + boolean_t estimate = (zc->zc_guid != 0); error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); if (error) { spa_close(spa, FTAG); return (error); } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); spa_close(spa, FTAG); return (error); } if (zc->zc_fromobj != 0) { rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); } error = dmu_objset_from_ds(dsfrom, &fromsnap); if (error) { dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } } else { spa_close(spa, FTAG); } - fp = getf(zc->zc_cookie); - if (fp == NULL) { - dsl_dataset_rele(ds, FTAG); - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - return (EBADF); - } + if (estimate) { + error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + &zc->zc_objset_type); + } else { + file_t *fp = getf(zc->zc_cookie); + if (fp == NULL) { + dsl_dataset_rele(ds, FTAG); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); + return (EBADF); + } - off = fp->f_offset; - error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off); + off = fp->f_offset; + error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, + fp->f_vnode, &off); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - releasef(zc->zc_cookie); + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + releasef(zc->zc_cookie); + } if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } static int zfs_ioc_inject_fault(zfs_cmd_t *zc) { int id, error; error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id, &zc->zc_inject_record); if (error == 0) zc->zc_guid = (uint64_t)id; return (error); } static int zfs_ioc_clear_fault(zfs_cmd_t *zc) { return (zio_clear_fault((int)zc->zc_guid)); } static int zfs_ioc_inject_list_next(zfs_cmd_t *zc) { int id = (int)zc->zc_guid; int error; error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name), &zc->zc_inject_record); zc->zc_guid = id; return (error); } static int zfs_ioc_error_log(zfs_cmd_t *zc) { spa_t *spa; int error; size_t count = (size_t)zc->zc_nvlist_dst_size; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst, &count); if (error == 0) zc->zc_nvlist_dst_size = count; else zc->zc_nvlist_dst_size = spa_get_errlog_size(spa); spa_close(spa, FTAG); return (error); } static int zfs_ioc_clear(zfs_cmd_t *zc) { spa_t *spa; vdev_t *vd; int error; /* * On zpool clear we also fix up missing slogs */ mutex_enter(&spa_namespace_lock); spa = spa_lookup(zc->zc_name); if (spa == NULL) { mutex_exit(&spa_namespace_lock); return (EIO); } if (spa_get_log_state(spa) == SPA_LOG_MISSING) { /* we need to let spa_open/spa_load clear the chains */ spa_set_log_state(spa, SPA_LOG_CLEAR); } spa->spa_last_open_failed = 0; mutex_exit(&spa_namespace_lock); if (zc->zc_cookie & ZPOOL_NO_REWIND) { error = spa_open(zc->zc_name, &spa, FTAG); } else { nvlist_t *policy; nvlist_t *config = NULL; if (zc->zc_nvlist_src == 0) return (EINVAL); if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) { error = spa_open_rewind(zc->zc_name, &spa, FTAG, policy, &config); if (config != NULL) { int err; if ((err = put_nvlist(zc, config)) != 0) error = err; nvlist_free(config); } nvlist_free(policy); } } if (error) return (error); spa_vdev_state_enter(spa, SCL_NONE); if (zc->zc_guid == 0) { vd = NULL; } else { vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE); if (vd == NULL) { (void) spa_vdev_state_exit(spa, NULL, ENODEV); spa_close(spa, FTAG); return (ENODEV); } } vdev_clear(spa, vd); (void) spa_vdev_state_exit(spa, NULL, 0); /* * Resume any suspended I/Os. */ if (zio_resume(spa) != 0) error = EIO; spa_close(spa, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_value name of origin snapshot * * outputs: * zc_string name of conflicting snapshot, if there is one */ static int zfs_ioc_promote(zfs_cmd_t *zc) { char *cp; /* * We don't need to unmount *all* the origin fs's snapshots, but * it's easier. */ cp = strchr(zc->zc_value, '@'); if (cp) *cp = '\0'; (void) dmu_objset_find(zc->zc_value, zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); return (dsl_dataset_promote(zc->zc_name, zc->zc_string)); } /* * Retrieve a single {user|group}{used|quota}@... property. * * inputs: * zc_name name of filesystem * zc_objset_type zfs_userquota_prop_t * zc_value domain name (eg. "S-1-234-567-89") * zc_guid RID/UID/GID * * outputs: * zc_cookie property value */ static int zfs_ioc_userspace_one(zfs_cmd_t *zc) { zfs_sb_t *zsb; int error; if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); if (error) return (error); error = zfs_userspace_one(zsb, zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie); zfs_sb_rele(zsb, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_objset_type zfs_userquota_prop_t * zc_nvlist_dst[_size] buffer to fill (not really an nvlist) * * outputs: * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t) * zc_cookie zap cursor */ static int zfs_ioc_userspace_many(zfs_cmd_t *zc) { zfs_sb_t *zsb; int bufsize = zc->zc_nvlist_dst_size; int error; void *buf; if (bufsize <= 0) return (ENOMEM); error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); if (error) return (error); buf = vmem_alloc(bufsize, KM_SLEEP); error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie, buf, &zc->zc_nvlist_dst_size); if (error == 0) { error = xcopyout(buf, (void *)(uintptr_t)zc->zc_nvlist_dst, zc->zc_nvlist_dst_size); } vmem_free(buf, bufsize); zfs_sb_rele(zsb, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * * outputs: * none */ static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) { objset_t *os; int error = 0; zfs_sb_t *zsb; if (get_zfs_sb(zc->zc_name, &zsb) == 0) { if (!dmu_objset_userused_enabled(zsb->z_os)) { /* * If userused is not enabled, it may be because the * objset needs to be closed & reopened (to grow the * objset_phys_t). Suspend/resume the fs will do that. */ error = zfs_suspend_fs(zsb); if (error == 0) error = zfs_resume_fs(zsb, zc->zc_name); } if (error == 0) error = dmu_objset_userspace_upgrade(zsb->z_os); deactivate_super(zsb->z_sb); } else { /* XXX kind of reading contents without owning */ error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error); error = dmu_objset_userspace_upgrade(os); dmu_objset_rele(os, FTAG); } return (error); } static int zfs_ioc_share(zfs_cmd_t *zc) { return (ENOSYS); } ace_t full_access[] = { {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0} }; /* * inputs: * zc_name name of containing filesystem * zc_obj object # beyond which we want next in-use object # * * outputs: * zc_obj next in-use object # */ static int zfs_ioc_next_obj(zfs_cmd_t *zc) { objset_t *os = NULL; int error; error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error); error = dmu_object_next(os, &zc->zc_obj, B_FALSE, os->os_dsl_dataset->ds_phys->ds_prev_snap_txg); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_value prefix name for snapshot * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) { char *snap_name; int error; snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); if (strlen(snap_name) >= MAXNAMELEN) { strfree(snap_name); return (E2BIG); } error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } (void) strcpy(zc->zc_value, snap_name); strfree(snap_name); return (0); } /* * inputs: * zc_name name of "to" snapshot * zc_value name of "from" snapshot * zc_cookie file descriptor to write diff data on * * outputs: * dmu_diff_record_t's to the file descriptor */ static int zfs_ioc_diff(zfs_cmd_t *zc) { objset_t *fromsnap; objset_t *tosnap; file_t *fp; offset_t off; int error; error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); if (error) return (error); error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap); if (error) { dmu_objset_rele(tosnap, FTAG); return (error); } fp = getf(zc->zc_cookie); if (fp == NULL) { dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); return (EBADF); } off = fp->f_offset; error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); return (error); } /* * Remove all ACL files in shares dir */ #ifdef HAVE_SMB_SHARE static int zfs_smb_acl_purge(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; zfs_sb_t *zsb = ZTOZSB(dzp); int error; for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred, NULL, 0)) != 0) break; } zap_cursor_fini(&zc); return (error); } #endif /* HAVE_SMB_SHARE */ static int zfs_ioc_smb_acl(zfs_cmd_t *zc) { #ifdef HAVE_SMB_SHARE vnode_t *vp; znode_t *dzp; vnode_t *resourcevp = NULL; znode_t *sharedir; zfs_sb_t *zsb; nvlist_t *nvlist; char *src, *target; vattr_t vattr; vsecattr_t vsec; int error = 0; if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp)) != 0) return (error); /* Now make sure mntpnt and dataset are ZFS */ if (vp->v_vfsp->vfs_fstype != zfsfstype || (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); return (EINVAL); } dzp = VTOZ(vp); zsb = ZTOZSB(dzp); ZFS_ENTER(zsb); /* * Create share dir if its missing. */ mutex_enter(&zsb->z_lock); if (zsb->z_shares_dir == 0) { dmu_tx_t *tx; tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE, ZFS_SHARES_DIR); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { error = zfs_create_share_dir(zsb, tx); dmu_tx_commit(tx); } if (error) { mutex_exit(&zsb->z_lock); VN_RELE(vp); ZFS_EXIT(zsb); return (error); } } mutex_exit(&zsb->z_lock); ASSERT(zsb->z_shares_dir); if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) { VN_RELE(vp); ZFS_EXIT(zsb); return (error); } switch (zc->zc_cookie) { case ZFS_SMB_ACL_ADD: vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_mode = S_IFREG|0777; vattr.va_uid = 0; vattr.va_gid = 0; vsec.vsa_mask = VSA_ACE; vsec.vsa_aclentp = &full_access; vsec.vsa_aclentsz = sizeof (full_access); vsec.vsa_aclcnt = 1; error = VOP_CREATE(ZTOV(sharedir), zc->zc_string, &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec); if (resourcevp) VN_RELE(resourcevp); break; case ZFS_SMB_ACL_REMOVE: error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred, NULL, 0); break; case ZFS_SMB_ACL_RENAME: if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) { VN_RELE(vp); ZFS_EXIT(zsb); return (error); } if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) || nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET, &target)) { VN_RELE(vp); VN_RELE(ZTOV(sharedir)); ZFS_EXIT(zsb); nvlist_free(nvlist); return (error); } error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target, kcred, NULL, 0); nvlist_free(nvlist); break; case ZFS_SMB_ACL_PURGE: error = zfs_smb_acl_purge(sharedir); break; default: error = EINVAL; break; } VN_RELE(vp); VN_RELE(ZTOV(sharedir)); ZFS_EXIT(zsb); return (error); #else return (ENOTSUP); #endif /* HAVE_SMB_SHARE */ } /* * inputs: * zc_name name of filesystem * zc_value short name of snap * zc_string user-supplied tag for this hold * zc_cookie recursive flag * zc_temphold set if hold is temporary * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * zc_sendobj if non-zero, the objid for zc_name@zc_value * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg * * outputs: none */ static int zfs_ioc_hold(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie; spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; int error; minor_t minor = 0; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); if (zc->zc_sendobj == 0) { return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd)); } if (recursive) return (EINVAL); error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) return (error); /* * Until we have a hold on this snapshot, it's possible that * zc_sendobj could've been destroyed and reused as part * of a later txg. Make sure we're looking at the right object. */ if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { dsl_dataset_rele(ds, FTAG); return (ENOENT); } if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); if (error) { dsl_dataset_rele(ds, FTAG); return (error); } } error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, zc->zc_temphold); if (minor != 0) { if (error == 0) { dsl_register_onexit_hold_cleanup(ds, zc->zc_string, minor); } zfs_onexit_fd_rele(zc->zc_cleanup_fd); } dsl_dataset_rele(ds, FTAG); return (error); } /* * inputs: * zc_name name of dataset from which we're releasing a user hold * zc_value short name of snap * zc_string user-supplied tag for this hold * zc_cookie recursive flag * * outputs: none */ static int zfs_ioc_release(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); return (dsl_dataset_user_release(zc->zc_name, zc->zc_value, zc->zc_string, recursive)); } /* * inputs: * zc_name name of filesystem * * outputs: * zc_nvlist_src{_size} nvlist of snapshot holds */ static int zfs_ioc_get_holds(zfs_cmd_t *zc) { nvlist_t *nvp; int error; if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) { error = put_nvlist(zc, nvp); nvlist_free(nvp); } return (error); } /* * inputs: * zc_guid flags (ZEVENT_NONBLOCK) * * outputs: * zc_nvlist_dst next nvlist event * zc_cookie dropped events since last get * zc_cleanup_fd cleanup-on-exit file descriptor */ static int zfs_ioc_events_next(zfs_cmd_t *zc) { zfs_zevent_t *ze; nvlist_t *event = NULL; minor_t minor; uint64_t dropped = 0; int error; error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze); if (error != 0) return (error); do { error = zfs_zevent_next(ze, &event, &zc->zc_nvlist_dst_size, &dropped); if (event != NULL) { zc->zc_cookie = dropped; error = put_nvlist(zc, event); nvlist_free(event); } if (zc->zc_guid & ZEVENT_NONBLOCK) break; if ((error == 0) || (error != ENOENT)) break; error = zfs_zevent_wait(ze); if (error) break; } while (1); zfs_zevent_fd_rele(zc->zc_cleanup_fd); return (error); } /* * outputs: * zc_cookie cleared events count */ static int zfs_ioc_events_clear(zfs_cmd_t *zc) { int count; zfs_zevent_drain_all(&count); zc->zc_cookie = count; return 0; } +/* + * inputs: + * zc_name name of new filesystem or snapshot + * zc_value full name of old snapshot + * + * outputs: + * zc_cookie space in bytes + * zc_objset_type compressed space in bytes + * zc_perm_action uncompressed space in bytes + */ +static int +zfs_ioc_space_written(zfs_cmd_t *zc) +{ + int error; + dsl_dataset_t *new, *old; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (error != 0) + return (error); + error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + if (error != 0) { + dsl_dataset_rele(new, FTAG); + return (error); + } + + error = dsl_dataset_space_written(old, new, &zc->zc_cookie, + &zc->zc_objset_type, &zc->zc_perm_action); + dsl_dataset_rele(old, FTAG); + dsl_dataset_rele(new, FTAG); + return (error); +} + +/* + * inputs: + * zc_name full name of last snapshot + * zc_value full name of first snapshot + * + * outputs: + * zc_cookie space in bytes + * zc_objset_type compressed space in bytes + * zc_perm_action uncompressed space in bytes + */ +static int +zfs_ioc_space_snaps(zfs_cmd_t *zc) +{ + int error; + dsl_dataset_t *new, *old; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (error != 0) + return (error); + error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + if (error != 0) { + dsl_dataset_rele(new, FTAG); + return (error); + } + + error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, + &zc->zc_objset_type, &zc->zc_perm_action); + dsl_dataset_rele(old, FTAG); + dsl_dataset_rele(new, FTAG); + return (error); +} + /* * pool create, destroy, and export don't log the history as part of * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export * do the logging of those commands. */ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_NONE }, { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY }, { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, + { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_NONE }, { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME, - B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, + DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE } + POOL_CHECK_NONE }, + { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, }; int pool_status_check(const char *name, zfs_ioc_namecheck_t type, zfs_ioc_poolcheck_t check) { spa_t *spa; int error; ASSERT(type == POOL_NAME || type == DATASET_NAME); if (check & POOL_CHECK_NONE) return (0); error = spa_open(name, &spa, FTAG); if (error == 0) { if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa)) error = EAGAIN; else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa)) error = EROFS; spa_close(spa, FTAG); } return (error); } static void * zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which) { zfsdev_state_t *zs; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); for (zs = list_head(&zfsdev_state_list); zs != NULL; zs = list_next(&zfsdev_state_list, zs)) { if (zs->zs_minor == minor) { switch (which) { case ZST_ONEXIT: return (zs->zs_onexit); case ZST_ZEVENT: return (zs->zs_zevent); case ZST_ALL: return (zs); } } } return NULL; } void * zfsdev_get_state(minor_t minor, enum zfsdev_state_type which) { void *ptr; mutex_enter(&zfsdev_state_lock); ptr = zfsdev_get_state_impl(minor, which); mutex_exit(&zfsdev_state_lock); return ptr; } minor_t zfsdev_getminor(struct file *filp) { ASSERT(filp != NULL); ASSERT(filp->private_data != NULL); return (((zfsdev_state_t *)filp->private_data)->zs_minor); } /* * Find a free minor number. The zfsdev_state_list is expected to * be short since it is only a list of currently open file handles. */ minor_t zfsdev_minor_alloc(void) { static minor_t last_minor = 0; minor_t m; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); for (m = last_minor + 1; m != last_minor; m++) { if (m > ZFSDEV_MAX_MINOR) m = 1; if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) { last_minor = m; return (m); } } return (0); } static int zfsdev_state_init(struct file *filp) { zfsdev_state_t *zs; minor_t minor; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); minor = zfsdev_minor_alloc(); if (minor == 0) return (ENXIO); zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP); if (zs == NULL) return (ENOMEM); zs->zs_file = filp; zs->zs_minor = minor; filp->private_data = zs; zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit); zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent); list_insert_tail(&zfsdev_state_list, zs); return (0); } static int zfsdev_state_destroy(struct file *filp) { zfsdev_state_t *zs; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); ASSERT(filp->private_data != NULL); zs = filp->private_data; zfs_onexit_destroy(zs->zs_onexit); zfs_zevent_destroy(zs->zs_zevent); list_remove(&zfsdev_state_list, zs); kmem_free(zs, sizeof(zfsdev_state_t)); return 0; } static int zfsdev_open(struct inode *ino, struct file *filp) { int error; mutex_enter(&zfsdev_state_lock); error = zfsdev_state_init(filp); mutex_exit(&zfsdev_state_lock); return (-error); } static int zfsdev_release(struct inode *ino, struct file *filp) { int error; mutex_enter(&zfsdev_state_lock); error = zfsdev_state_destroy(filp); mutex_exit(&zfsdev_state_lock); return (-error); } static long zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; uint_t vec; int error, rc, flag = 0; vec = cmd - ZFS_IOC; if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (-EINVAL); zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); if (error != 0) error = EFAULT; if ((error == 0) && !(flag & FKIOCTL)) error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED()); /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ if (error == 0) { zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; zc->zc_iflags = flag & FKIOCTL; switch (zfs_ioc_vec[vec].zvec_namecheck) { case POOL_NAME: if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) error = EINVAL; error = pool_status_check(zc->zc_name, zfs_ioc_vec[vec].zvec_namecheck, zfs_ioc_vec[vec].zvec_pool_check); break; case DATASET_NAME: if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) error = EINVAL; error = pool_status_check(zc->zc_name, zfs_ioc_vec[vec].zvec_namecheck, zfs_ioc_vec[vec].zvec_pool_check); break; case NO_NAME: break; } } if (error == 0) error = zfs_ioc_vec[vec].zvec_func(zc); rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); if (error == 0) { if (rc != 0) error = EFAULT; if (zfs_ioc_vec[vec].zvec_his_log) zfs_log_history(zc); } kmem_free(zc, sizeof (zfs_cmd_t)); return (-error); } #ifdef CONFIG_COMPAT static long zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { return zfsdev_ioctl(filp, cmd, arg); } #else #define zfsdev_compat_ioctl NULL #endif static const struct file_operations zfsdev_fops = { .open = zfsdev_open, .release = zfsdev_release, .unlocked_ioctl = zfsdev_ioctl, .compat_ioctl = zfsdev_compat_ioctl, .owner = THIS_MODULE, }; static struct miscdevice zfs_misc = { .minor = MISC_DYNAMIC_MINOR, .name = ZFS_DRIVER, .fops = &zfsdev_fops, }; static int zfs_attach(void) { int error; mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsdev_state_list, sizeof (zfsdev_state_t), offsetof(zfsdev_state_t, zs_next)); error = misc_register(&zfs_misc); if (error) { printk(KERN_INFO "ZFS: misc_register() failed %d\n", error); return (error); } return (0); } static void zfs_detach(void) { int error; error = misc_deregister(&zfs_misc); if (error) printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error); mutex_destroy(&zfsdev_state_lock); list_destroy(&zfsdev_state_list); } uint_t zfs_fsyncer_key; extern uint_t rrw_tsd_key; #ifdef DEBUG #define ZFS_DEBUG_STR " (DEBUG mode)" #else #define ZFS_DEBUG_STR "" #endif int _init(void) { int error; spa_init(FREAD | FWRITE); zfs_init(); if ((error = zvol_init()) != 0) goto out1; if ((error = zfs_attach()) != 0) goto out2; tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, NULL); printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, " "ZFS pool version %s, ZFS filesystem version %s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR, SPA_VERSION_STRING, ZPL_VERSION_STRING); return (0); out2: (void) zvol_fini(); out1: zfs_fini(); spa_fini(); printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s" ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR, error); return (error); } int _fini(void) { zfs_detach(); zvol_fini(); zfs_fini(); spa_fini(); tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR); return (0); } #ifdef HAVE_SPL spl_module_init(_init); spl_module_exit(_fini); MODULE_DESCRIPTION("ZFS"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_LICENSE(ZFS_META_LICENSE); #endif /* HAVE_SPL */