diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/arcstat.in index b1b6655f7f26..628a6bcef772 100755 --- a/sys/contrib/openzfs/cmd/arcstat.in +++ b/sys/contrib/openzfs/cmd/arcstat.in @@ -1,554 +1,554 @@ #!/usr/bin/env @PYTHON_SHEBANG@ # # Print out ZFS ARC Statistics exported via kstat(1) # For a definition of fields, or usage, use arcstat -v # # This script was originally a fork of the original arcstat.pl (0.1) # by Neelakanth Nadgir, originally published on his Sun blog on # 09/18/2007 # http://blogs.sun.com/realneel/entry/zfs_arc_statistics # # A new version aimed to improve upon the original by adding features # and fixing bugs as needed. This version was maintained by Mike # Harsch and was hosted in a public open source repository: # http://github.com/mharsch/arcstat # # but has since moved to the illumos-gate repository. # # This Python port was written by John Hixson for FreeNAS, introduced # in commit e2c29f: # https://github.com/freenas/freenas # # and has been improved by many people since. # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License, Version 1.0 only # (the "License"). You may not use this file except in compliance # with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Fields have a fixed width. Every interval, we fill the "v" # hash with its corresponding value (v[field]=value) using calculate(). # @hdr is the array of fields that needs to be printed, so we # just iterate over this array and print the values using our pretty printer. # # This script must remain compatible with Python 3.6+. # import sys import time import getopt import re import copy from signal import signal, SIGINT, SIGWINCH, SIG_DFL cols = { # HDR: [Size, Scale, Description] "time": [8, -1, "Time"], "hits": [4, 1000, "ARC reads per second"], "miss": [4, 1000, "ARC misses per second"], "read": [4, 1000, "Total ARC accesses per second"], "hit%": [4, 100, "ARC hit percentage"], "miss%": [5, 100, "ARC miss percentage"], "dhit": [4, 1000, "Demand hits per second"], "dmis": [4, 1000, "Demand misses per second"], "dh%": [3, 100, "Demand hit percentage"], "dm%": [3, 100, "Demand miss percentage"], "phit": [4, 1000, "Prefetch hits per second"], "pmis": [4, 1000, "Prefetch misses per second"], "ph%": [3, 100, "Prefetch hits percentage"], "pm%": [3, 100, "Prefetch miss percentage"], "mhit": [4, 1000, "Metadata hits per second"], "mmis": [4, 1000, "Metadata misses per second"], "mread": [5, 1000, "Metadata accesses per second"], "mh%": [3, 100, "Metadata hit percentage"], "mm%": [3, 100, "Metadata miss percentage"], "arcsz": [5, 1024, "ARC size"], "size": [4, 1024, "ARC size"], "c": [4, 1024, "ARC target size"], "mfu": [4, 1000, "MFU list hits per second"], "mru": [4, 1000, "MRU list hits per second"], "mfug": [4, 1000, "MFU ghost list hits per second"], "mrug": [4, 1000, "MRU ghost list hits per second"], "eskip": [5, 1000, "evict_skip per second"], "el2skip": [7, 1000, "evict skip, due to l2 writes, per second"], "el2cach": [7, 1024, "Size of L2 cached evictions per second"], "el2el": [5, 1024, "Size of L2 eligible evictions per second"], "el2mfu": [6, 1024, "Size of L2 eligible MFU evictions per second"], "el2mru": [6, 1024, "Size of L2 eligible MRU evictions per second"], "el2inel": [7, 1024, "Size of L2 ineligible evictions per second"], "mtxmis": [6, 1000, "mutex_miss per second"], "dread": [5, 1000, "Demand accesses per second"], "pread": [5, 1000, "Prefetch accesses per second"], "l2hits": [6, 1000, "L2ARC hits per second"], "l2miss": [6, 1000, "L2ARC misses per second"], "l2read": [6, 1000, "Total L2ARC accesses per second"], "l2hit%": [6, 100, "L2ARC access hit percentage"], "l2miss%": [7, 100, "L2ARC access miss percentage"], "l2pref": [6, 1024, "L2ARC prefetch allocated size"], "l2mfu": [5, 1024, "L2ARC MFU allocated size"], "l2mru": [5, 1024, "L2ARC MRU allocated size"], "l2data": [6, 1024, "L2ARC data allocated size"], "l2meta": [6, 1024, "L2ARC metadata allocated size"], "l2pref%": [7, 100, "L2ARC prefetch percentage"], "l2mfu%": [6, 100, "L2ARC MFU percentage"], "l2mru%": [6, 100, "L2ARC MRU percentage"], "l2data%": [7, 100, "L2ARC data percentage"], "l2meta%": [7, 100, "L2ARC metadata percentage"], "l2asize": [7, 1024, "Actual (compressed) size of the L2ARC"], "l2size": [6, 1024, "Size of the L2ARC"], "l2bytes": [7, 1024, "Bytes read per second from the L2ARC"], "grow": [4, 1000, "ARC grow disabled"], "need": [4, 1024, "ARC reclaim need"], "free": [4, 1024, "ARC free memory"], "avail": [5, 1024, "ARC available memory"], "waste": [5, 1024, "Wasted memory due to round up to pagesize"], } v = {} hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis", "mm%", "size", "c", "avail"] xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "dread", "pread", "read"] sint = 1 # Default interval is 1 second count = 1 # Default count is 1 hdr_intr = 20 # Print header every 20 lines of output opfile = None sep = " " # Default separator is 2 spaces version = "0.4" l2exist = False cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval " "[count]]\n") cur = {} d = {} out = None kstat = None pretty_print = True if sys.platform.startswith('freebsd'): # Requires py-sysctl on FreeBSD import sysctl def kstat_update(): global kstat k = [ctl for ctl in sysctl.filter('kstat.zfs.misc.arcstats') if ctl.type != sysctl.CTLTYPE_NODE] if not k: sys.exit(1) kstat = {} for s in k: if not s: continue name, value = s.name, s.value # Trims 'kstat.zfs.misc.arcstats' from the name kstat[name[24:]] = int(value) elif sys.platform.startswith('linux'): def kstat_update(): global kstat k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')] if not k: sys.exit(1) del k[0:2] kstat = {} for s in k: if not s: continue name, unused, value = s.split() kstat[name] = int(value) def detailed_usage(): sys.stderr.write("%s\n" % cmd) sys.stderr.write("Field definitions are as follows:\n") for key in cols: sys.stderr.write("%11s : %s\n" % (key, cols[key][2])) sys.stderr.write("\n") sys.exit(0) def usage(): sys.stderr.write("%s\n" % cmd) sys.stderr.write("\t -h : Print this help message\n") sys.stderr.write("\t -a : Print all possible stats\n") sys.stderr.write("\t -v : List all possible field headers and definitions" "\n") sys.stderr.write("\t -x : Print extended stats\n") sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n") sys.stderr.write("\t -o : Redirect output to the specified file\n") sys.stderr.write("\t -s : Override default field separator with custom " "character or string\n") sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n") sys.stderr.write("\nExamples:\n") sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n") sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n") sys.stderr.write("\tarcstat -v\n") sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n") sys.stderr.write("\n") sys.exit(1) def snap_stats(): global cur global kstat prev = copy.deepcopy(cur) kstat_update() cur = kstat for key in cur: if re.match(key, "class"): continue if key in prev: d[key] = cur[key] - prev[key] else: d[key] = cur[key] def prettynum(sz, scale, num=0): suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'] index = 0 save = 0 # Special case for date field if scale == -1: return "%s" % num # Rounding error, return 0 elif 0 < num < 1: num = 0 while abs(num) > scale and index < 5: save = num num = num / scale index += 1 if index == 0: return "%*d" % (sz, num) if abs(save / scale) < 10: return "%*.1f%s" % (sz - 1, num, suffix[index]) else: return "%*d%s" % (sz - 1, num, suffix[index]) def print_values(): global hdr global sep global v global pretty_print if pretty_print: fmt = lambda col: prettynum(cols[col][0], cols[col][1], v[col]) else: - fmt = lambda col: v[col] + fmt = lambda col: str(v[col]) sys.stdout.write(sep.join(fmt(col) for col in hdr)) sys.stdout.write("\n") sys.stdout.flush() def print_header(): global hdr global sep global pretty_print if pretty_print: fmt = lambda col: "%*s" % (cols[col][0], col) else: fmt = lambda col: col sys.stdout.write(sep.join(fmt(col) for col in hdr)) sys.stdout.write("\n") def get_terminal_lines(): try: import fcntl import termios import struct data = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, '1234') sz = struct.unpack('hh', data) return sz[0] except Exception: pass def update_hdr_intr(): global hdr_intr lines = get_terminal_lines() if lines and lines > 3: hdr_intr = lines - 3 def resize_handler(signum, frame): update_hdr_intr() def init(): global sint global count global hdr global xhdr global opfile global sep global out global l2exist global pretty_print desired_cols = None aflag = False xflag = False hflag = False vflag = False i = 1 try: opts, args = getopt.getopt( sys.argv[1:], "axo:hvs:f:p", [ "all", "extended", "outfile", "help", "verbose", "separator", "columns", "parsable" ] ) except getopt.error as msg: sys.stderr.write("Error: %s\n" % str(msg)) usage() opts = None for opt, arg in opts: if opt in ('-a', '--all'): aflag = True if opt in ('-x', '--extended'): xflag = True if opt in ('-o', '--outfile'): opfile = arg i += 1 if opt in ('-h', '--help'): hflag = True if opt in ('-v', '--verbose'): vflag = True if opt in ('-s', '--separator'): sep = arg i += 1 if opt in ('-f', '--columns'): desired_cols = arg i += 1 if opt in ('-p', '--parsable'): pretty_print = False i += 1 argv = sys.argv[i:] sint = int(argv[0]) if argv else sint count = int(argv[1]) if len(argv) > 1 else (0 if len(argv) > 0 else 1) if hflag or (xflag and desired_cols): usage() if vflag: detailed_usage() if xflag: hdr = xhdr update_hdr_intr() # check if L2ARC exists snap_stats() l2_size = cur.get("l2_size") if l2_size: l2exist = True if desired_cols: hdr = desired_cols.split(",") invalid = [] incompat = [] for ele in hdr: if ele not in cols: invalid.append(ele) elif not l2exist and ele.startswith("l2"): sys.stdout.write("No L2ARC Here\n%s\n" % ele) incompat.append(ele) if len(invalid) > 0: sys.stderr.write("Invalid column definition! -- %s\n" % invalid) usage() if len(incompat) > 0: sys.stderr.write("Incompatible field specified! -- %s\n" % incompat) usage() if aflag: if l2exist: hdr = cols.keys() else: hdr = [col for col in cols.keys() if not col.startswith("l2")] if opfile: try: out = open(opfile, "w") sys.stdout = out except IOError: sys.stderr.write("Cannot open %s for writing\n" % opfile) sys.exit(1) def calculate(): global d global v global l2exist v = dict() v["time"] = time.strftime("%H:%M:%S", time.localtime()) v["hits"] = d["hits"] // sint v["miss"] = d["misses"] // sint v["read"] = v["hits"] + v["miss"] v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0 v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0 v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint v["dread"] = v["dhit"] + v["dmis"] v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0 v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0 v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint v["pmis"] = (d["prefetch_data_misses"] + d["prefetch_metadata_misses"]) // sint v["pread"] = v["phit"] + v["pmis"] v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0 v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0 v["mhit"] = (d["prefetch_metadata_hits"] + d["demand_metadata_hits"]) // sint v["mmis"] = (d["prefetch_metadata_misses"] + d["demand_metadata_misses"]) // sint v["mread"] = v["mhit"] + v["mmis"] v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0 v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0 v["arcsz"] = cur["size"] v["size"] = cur["size"] v["c"] = cur["c"] v["mfu"] = d["mfu_hits"] // sint v["mru"] = d["mru_hits"] // sint v["mrug"] = d["mru_ghost_hits"] // sint v["mfug"] = d["mfu_ghost_hits"] // sint v["eskip"] = d["evict_skip"] // sint v["el2skip"] = d["evict_l2_skip"] // sint v["el2cach"] = d["evict_l2_cached"] // sint v["el2el"] = d["evict_l2_eligible"] // sint v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint v["el2mru"] = d["evict_l2_eligible_mru"] // sint v["el2inel"] = d["evict_l2_ineligible"] // sint v["mtxmis"] = d["mutex_miss"] // sint if l2exist: v["l2hits"] = d["l2_hits"] // sint v["l2miss"] = d["l2_misses"] // sint v["l2read"] = v["l2hits"] + v["l2miss"] v["l2hit%"] = 100 * v["l2hits"] // v["l2read"] if v["l2read"] > 0 else 0 v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0 v["l2asize"] = cur["l2_asize"] v["l2size"] = cur["l2_size"] v["l2bytes"] = d["l2_read_bytes"] // sint v["l2pref"] = cur["l2_prefetch_asize"] v["l2mfu"] = cur["l2_mfu_asize"] v["l2mru"] = cur["l2_mru_asize"] v["l2data"] = cur["l2_bufc_data_asize"] v["l2meta"] = cur["l2_bufc_metadata_asize"] v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] v["l2data%"] = 100 * v["l2data"] // v["l2asize"] v["l2meta%"] = 100 * v["l2meta"] // v["l2asize"] v["grow"] = 0 if cur["arc_no_grow"] else 1 v["need"] = cur["arc_need_free"] v["free"] = cur["memory_free_bytes"] v["avail"] = cur["memory_available_bytes"] v["waste"] = cur["abd_chunk_waste_size"] def main(): global sint global count global hdr_intr i = 0 count_flag = 0 init() if count > 0: count_flag = 1 signal(SIGINT, SIG_DFL) signal(SIGWINCH, resize_handler) while True: if i == 0: print_header() snap_stats() calculate() print_values() if count_flag == 1: if count <= 1: break count -= 1 i = 0 if i >= hdr_intr else i + 1 time.sleep(sint) if out: out.close() if __name__ == '__main__': main() diff --git a/sys/contrib/openzfs/cmd/zed/zed_conf.c b/sys/contrib/openzfs/cmd/zed/zed_conf.c index 59935102f123..9a39d1a80985 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_conf.c +++ b/sys/contrib/openzfs/cmd/zed/zed_conf.c @@ -1,706 +1,720 @@ /* * This file is part of the ZFS Event Daemon (ZED). * * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. * Refer to the OpenZFS git commit log for authoritative copyright attribution. * * The contents of this file are subject to the terms of the * Common Development and Distribution License Version 1.0 (CDDL-1.0). * You can obtain a copy of the license from the top-level file * "OPENSOLARIS.LICENSE" or at . * You may not use this file except in compliance with the license. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zed.h" #include "zed_conf.h" #include "zed_file.h" #include "zed_log.h" #include "zed_strings.h" /* * Initialise the configuration with default values. */ void zed_conf_init(struct zed_conf *zcp) { memset(zcp, 0, sizeof (*zcp)); /* zcp->zfs_hdl opened in zed_event_init() */ /* zcp->zedlets created in zed_conf_scan_dir() */ zcp->pid_fd = -1; /* opened in zed_conf_write_pid() */ zcp->state_fd = -1; /* opened in zed_conf_open_state() */ zcp->zevent_fd = -1; /* opened in zed_event_init() */ zcp->max_jobs = 16; + zcp->max_zevent_buf_len = 1 << 20; if (!(zcp->pid_file = strdup(ZED_PID_FILE)) || !(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) || !(zcp->state_file = strdup(ZED_STATE_FILE))) zed_log_die("Failed to create conf: %s", strerror(errno)); } /* * Destroy the configuration [zcp]. * * Note: zfs_hdl & zevent_fd are destroyed via zed_event_fini(). */ void zed_conf_destroy(struct zed_conf *zcp) { if (zcp->state_fd >= 0) { if (close(zcp->state_fd) < 0) zed_log_msg(LOG_WARNING, "Failed to close state file \"%s\": %s", zcp->state_file, strerror(errno)); zcp->state_fd = -1; } if (zcp->pid_file) { if ((unlink(zcp->pid_file) < 0) && (errno != ENOENT)) zed_log_msg(LOG_WARNING, "Failed to remove PID file \"%s\": %s", zcp->pid_file, strerror(errno)); } if (zcp->pid_fd >= 0) { if (close(zcp->pid_fd) < 0) zed_log_msg(LOG_WARNING, "Failed to close PID file \"%s\": %s", zcp->pid_file, strerror(errno)); zcp->pid_fd = -1; } if (zcp->pid_file) { free(zcp->pid_file); zcp->pid_file = NULL; } if (zcp->zedlet_dir) { free(zcp->zedlet_dir); zcp->zedlet_dir = NULL; } if (zcp->state_file) { free(zcp->state_file); zcp->state_file = NULL; } if (zcp->zedlets) { zed_strings_destroy(zcp->zedlets); zcp->zedlets = NULL; } } /* * Display command-line help and exit. * * If [got_err] is 0, output to stdout and exit normally; * otherwise, output to stderr and exit with a failure status. */ static void _zed_conf_display_help(const char *prog, boolean_t got_err) { struct opt { const char *o, *d, *v; }; FILE *fp = got_err ? stderr : stdout; struct opt *oo; struct opt iopts[] = { { .o = "-h", .d = "Display help" }, { .o = "-L", .d = "Display license information" }, { .o = "-V", .d = "Display version information" }, {}, }; struct opt nopts[] = { { .o = "-v", .d = "Be verbose" }, { .o = "-f", .d = "Force daemon to run" }, { .o = "-F", .d = "Run daemon in the foreground" }, { .o = "-I", .d = "Idle daemon until kernel module is (re)loaded" }, { .o = "-M", .d = "Lock all pages in memory" }, { .o = "-P", .d = "$PATH for ZED to use (only used by ZTS)" }, { .o = "-Z", .d = "Zero state file" }, {}, }; struct opt vopts[] = { { .o = "-d DIR", .d = "Read enabled ZEDLETs from DIR.", .v = ZED_ZEDLET_DIR }, { .o = "-p FILE", .d = "Write daemon's PID to FILE.", .v = ZED_PID_FILE }, { .o = "-s FILE", .d = "Write daemon's state to FILE.", .v = ZED_STATE_FILE }, { .o = "-j JOBS", .d = "Start at most JOBS at once.", .v = "16" }, + { .o = "-b LEN", .d = "Cap kernel event buffer at LEN entries.", + .v = "1048576" }, {}, }; fprintf(fp, "Usage: %s [OPTION]...\n", (prog ? prog : "zed")); fprintf(fp, "\n"); for (oo = iopts; oo->o; ++oo) fprintf(fp, " %*s %s\n", -8, oo->o, oo->d); fprintf(fp, "\n"); for (oo = nopts; oo->o; ++oo) fprintf(fp, " %*s %s\n", -8, oo->o, oo->d); fprintf(fp, "\n"); for (oo = vopts; oo->o; ++oo) fprintf(fp, " %*s %s [%s]\n", -8, oo->o, oo->d, oo->v); fprintf(fp, "\n"); exit(got_err ? EXIT_FAILURE : EXIT_SUCCESS); } /* * Display license information to stdout and exit. */ static void _zed_conf_display_license(void) { printf( "The ZFS Event Daemon (ZED) is distributed under the terms of the\n" " Common Development and Distribution License (CDDL-1.0)\n" " .\n" "\n" "Developed at Lawrence Livermore National Laboratory" " (LLNL-CODE-403049).\n" "\n"); exit(EXIT_SUCCESS); } /* * Display version information to stdout and exit. */ static void _zed_conf_display_version(void) { printf("%s-%s-%s\n", ZFS_META_NAME, ZFS_META_VERSION, ZFS_META_RELEASE); exit(EXIT_SUCCESS); } /* * Copy the [path] string to the [resultp] ptr. * If [path] is not an absolute path, prefix it with the current working dir. * If [resultp] is non-null, free its existing string before assignment. */ static void _zed_conf_parse_path(char **resultp, const char *path) { char buf[PATH_MAX]; assert(resultp != NULL); assert(path != NULL); if (*resultp) free(*resultp); if (path[0] == '/') { *resultp = strdup(path); } else { if (!getcwd(buf, sizeof (buf))) zed_log_die("Failed to get current working dir: %s", strerror(errno)); if (strlcat(buf, "/", sizeof (buf)) >= sizeof (buf) || strlcat(buf, path, sizeof (buf)) >= sizeof (buf)) zed_log_die("Failed to copy path: %s", strerror(ENAMETOOLONG)); *resultp = strdup(buf); } if (!*resultp) zed_log_die("Failed to copy path: %s", strerror(ENOMEM)); } /* * Parse the command-line options into the configuration [zcp]. */ void zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv) { - const char * const opts = ":hLVd:p:P:s:vfFMZIj:"; + const char * const opts = ":hLVd:p:P:s:vfFMZIj:b:"; int opt; unsigned long raw; if (!zcp || !argv || !argv[0]) zed_log_die("Failed to parse options: Internal error"); opterr = 0; /* suppress default getopt err msgs */ while ((opt = getopt(argc, argv, opts)) != -1) { switch (opt) { case 'h': _zed_conf_display_help(argv[0], B_FALSE); break; case 'L': _zed_conf_display_license(); break; case 'V': _zed_conf_display_version(); break; case 'd': _zed_conf_parse_path(&zcp->zedlet_dir, optarg); break; case 'I': zcp->do_idle = 1; break; case 'p': _zed_conf_parse_path(&zcp->pid_file, optarg); break; case 'P': _zed_conf_parse_path(&zcp->path, optarg); break; case 's': _zed_conf_parse_path(&zcp->state_file, optarg); break; case 'v': zcp->do_verbose = 1; break; case 'f': zcp->do_force = 1; break; case 'F': zcp->do_foreground = 1; break; case 'M': zcp->do_memlock = 1; break; case 'Z': zcp->do_zero = 1; break; case 'j': errno = 0; raw = strtoul(optarg, NULL, 0); if (errno == ERANGE || raw > INT16_MAX) { zed_log_die("%lu is too many jobs", raw); } if (raw == 0) { zed_log_die("0 jobs makes no sense"); } else { zcp->max_jobs = raw; } break; + case 'b': + errno = 0; + raw = strtoul(optarg, NULL, 0); + if (errno == ERANGE || raw > INT32_MAX) { + zed_log_die("%lu is too large", raw); + } if (raw == 0) { + zcp->max_zevent_buf_len = INT32_MAX; + } else { + zcp->max_zevent_buf_len = raw; + } + break; case '?': default: if (optopt == '?') _zed_conf_display_help(argv[0], B_FALSE); fprintf(stderr, "%s: Invalid option '-%c'\n\n", argv[0], optopt); _zed_conf_display_help(argv[0], B_TRUE); break; } } } /* * Scan the [zcp] zedlet_dir for files to exec based on the event class. * Files must be executable by user, but not writable by group or other. * Dotfiles are ignored. * * Return 0 on success with an updated set of zedlets, * or -1 on error with errno set. */ int zed_conf_scan_dir(struct zed_conf *zcp) { zed_strings_t *zedlets; DIR *dirp; struct dirent *direntp; char pathname[PATH_MAX]; struct stat st; int n; if (!zcp) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to scan zedlet dir: %s", strerror(errno)); return (-1); } zedlets = zed_strings_create(); if (!zedlets) { errno = ENOMEM; zed_log_msg(LOG_WARNING, "Failed to scan dir \"%s\": %s", zcp->zedlet_dir, strerror(errno)); return (-1); } dirp = opendir(zcp->zedlet_dir); if (!dirp) { int errno_bak = errno; zed_log_msg(LOG_WARNING, "Failed to open dir \"%s\": %s", zcp->zedlet_dir, strerror(errno)); zed_strings_destroy(zedlets); errno = errno_bak; return (-1); } while ((direntp = readdir(dirp))) { if (direntp->d_name[0] == '.') continue; n = snprintf(pathname, sizeof (pathname), "%s/%s", zcp->zedlet_dir, direntp->d_name); if ((n < 0) || (n >= sizeof (pathname))) { zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s", direntp->d_name, strerror(ENAMETOOLONG)); continue; } if (stat(pathname, &st) < 0) { zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s", pathname, strerror(errno)); continue; } if (!S_ISREG(st.st_mode)) { zed_log_msg(LOG_INFO, "Ignoring \"%s\": not a regular file", direntp->d_name); continue; } if ((st.st_uid != 0) && !zcp->do_force) { zed_log_msg(LOG_NOTICE, "Ignoring \"%s\": not owned by root", direntp->d_name); continue; } if (!(st.st_mode & S_IXUSR)) { zed_log_msg(LOG_INFO, "Ignoring \"%s\": not executable by user", direntp->d_name); continue; } if ((st.st_mode & S_IWGRP) && !zcp->do_force) { zed_log_msg(LOG_NOTICE, "Ignoring \"%s\": writable by group", direntp->d_name); continue; } if ((st.st_mode & S_IWOTH) && !zcp->do_force) { zed_log_msg(LOG_NOTICE, "Ignoring \"%s\": writable by other", direntp->d_name); continue; } if (zed_strings_add(zedlets, NULL, direntp->d_name) < 0) { zed_log_msg(LOG_WARNING, "Failed to register \"%s\": %s", direntp->d_name, strerror(errno)); continue; } if (zcp->do_verbose) zed_log_msg(LOG_INFO, "Registered zedlet \"%s\"", direntp->d_name); } if (closedir(dirp) < 0) { int errno_bak = errno; zed_log_msg(LOG_WARNING, "Failed to close dir \"%s\": %s", zcp->zedlet_dir, strerror(errno)); zed_strings_destroy(zedlets); errno = errno_bak; return (-1); } if (zcp->zedlets) zed_strings_destroy(zcp->zedlets); zcp->zedlets = zedlets; return (0); } /* * Write the PID file specified in [zcp]. * Return 0 on success, -1 on error. * * This must be called after fork()ing to become a daemon (so the correct PID * is recorded), but before daemonization is complete and the parent process * exits (for synchronization with systemd). */ int zed_conf_write_pid(struct zed_conf *zcp) { char buf[PATH_MAX]; int n; char *p; mode_t mask; int rv; if (!zcp || !zcp->pid_file) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to create PID file: %s", strerror(errno)); return (-1); } assert(zcp->pid_fd == -1); /* * Create PID file directory if needed. */ n = strlcpy(buf, zcp->pid_file, sizeof (buf)); if (n >= sizeof (buf)) { errno = ENAMETOOLONG; zed_log_msg(LOG_ERR, "Failed to create PID file: %s", strerror(errno)); goto err; } p = strrchr(buf, '/'); if (p) *p = '\0'; if ((mkdirp(buf, 0755) < 0) && (errno != EEXIST)) { zed_log_msg(LOG_ERR, "Failed to create directory \"%s\": %s", buf, strerror(errno)); goto err; } /* * Obtain PID file lock. */ mask = umask(0); umask(mask | 022); zcp->pid_fd = open(zcp->pid_file, O_RDWR | O_CREAT | O_CLOEXEC, 0644); umask(mask); if (zcp->pid_fd < 0) { zed_log_msg(LOG_ERR, "Failed to open PID file \"%s\": %s", zcp->pid_file, strerror(errno)); goto err; } rv = zed_file_lock(zcp->pid_fd); if (rv < 0) { zed_log_msg(LOG_ERR, "Failed to lock PID file \"%s\": %s", zcp->pid_file, strerror(errno)); goto err; } else if (rv > 0) { pid_t pid = zed_file_is_locked(zcp->pid_fd); if (pid < 0) { zed_log_msg(LOG_ERR, "Failed to test lock on PID file \"%s\"", zcp->pid_file); } else if (pid > 0) { zed_log_msg(LOG_ERR, "Found PID %d bound to PID file \"%s\"", pid, zcp->pid_file); } else { zed_log_msg(LOG_ERR, "Inconsistent lock state on PID file \"%s\"", zcp->pid_file); } goto err; } /* * Write PID file. */ n = snprintf(buf, sizeof (buf), "%d\n", (int)getpid()); if ((n < 0) || (n >= sizeof (buf))) { errno = ERANGE; zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s", zcp->pid_file, strerror(errno)); } else if (write(zcp->pid_fd, buf, n) != n) { zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s", zcp->pid_file, strerror(errno)); } else if (fdatasync(zcp->pid_fd) < 0) { zed_log_msg(LOG_ERR, "Failed to sync PID file \"%s\": %s", zcp->pid_file, strerror(errno)); } else { return (0); } err: if (zcp->pid_fd >= 0) { (void) close(zcp->pid_fd); zcp->pid_fd = -1; } return (-1); } /* * Open and lock the [zcp] state_file. * Return 0 on success, -1 on error. * * FIXME: Move state information into kernel. */ int zed_conf_open_state(struct zed_conf *zcp) { char dirbuf[PATH_MAX]; int n; char *p; int rv; if (!zcp || !zcp->state_file) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to open state file: %s", strerror(errno)); return (-1); } n = strlcpy(dirbuf, zcp->state_file, sizeof (dirbuf)); if (n >= sizeof (dirbuf)) { errno = ENAMETOOLONG; zed_log_msg(LOG_WARNING, "Failed to open state file: %s", strerror(errno)); return (-1); } p = strrchr(dirbuf, '/'); if (p) *p = '\0'; if ((mkdirp(dirbuf, 0755) < 0) && (errno != EEXIST)) { zed_log_msg(LOG_WARNING, "Failed to create directory \"%s\": %s", dirbuf, strerror(errno)); return (-1); } if (zcp->state_fd >= 0) { if (close(zcp->state_fd) < 0) { zed_log_msg(LOG_WARNING, "Failed to close state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } } if (zcp->do_zero) (void) unlink(zcp->state_file); zcp->state_fd = open(zcp->state_file, O_RDWR | O_CREAT | O_CLOEXEC, 0644); if (zcp->state_fd < 0) { zed_log_msg(LOG_WARNING, "Failed to open state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } rv = zed_file_lock(zcp->state_fd); if (rv < 0) { zed_log_msg(LOG_WARNING, "Failed to lock state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } if (rv > 0) { pid_t pid = zed_file_is_locked(zcp->state_fd); if (pid < 0) { zed_log_msg(LOG_WARNING, "Failed to test lock on state file \"%s\"", zcp->state_file); } else if (pid > 0) { zed_log_msg(LOG_WARNING, "Found PID %d bound to state file \"%s\"", pid, zcp->state_file); } else { zed_log_msg(LOG_WARNING, "Inconsistent lock state on state file \"%s\"", zcp->state_file); } return (-1); } return (0); } /* * Read the opened [zcp] state_file to obtain the eid & etime of the last event * processed. Write the state from the last event to the [eidp] & [etime] args * passed by reference. Note that etime[] is an array of size 2. * Return 0 on success, -1 on error. */ int zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[]) { ssize_t len; struct iovec iov[3]; ssize_t n; if (!zcp || !eidp || !etime) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to read state file: %s", strerror(errno)); return (-1); } if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t)-1) { zed_log_msg(LOG_WARNING, "Failed to reposition state file offset: %s", strerror(errno)); return (-1); } len = 0; iov[0].iov_base = eidp; len += iov[0].iov_len = sizeof (*eidp); iov[1].iov_base = &etime[0]; len += iov[1].iov_len = sizeof (etime[0]); iov[2].iov_base = &etime[1]; len += iov[2].iov_len = sizeof (etime[1]); n = readv(zcp->state_fd, iov, 3); if (n == 0) { *eidp = 0; } else if (n < 0) { zed_log_msg(LOG_WARNING, "Failed to read state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } else if (n != len) { errno = EIO; zed_log_msg(LOG_WARNING, "Failed to read state file \"%s\": Read %d of %d bytes", zcp->state_file, n, len); return (-1); } return (0); } /* * Write the [eid] & [etime] of the last processed event to the opened * [zcp] state_file. Note that etime[] is an array of size 2. * Return 0 on success, -1 on error. */ int zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[]) { ssize_t len; struct iovec iov[3]; ssize_t n; if (!zcp) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to write state file: %s", strerror(errno)); return (-1); } if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t)-1) { zed_log_msg(LOG_WARNING, "Failed to reposition state file offset: %s", strerror(errno)); return (-1); } len = 0; iov[0].iov_base = &eid; len += iov[0].iov_len = sizeof (eid); iov[1].iov_base = &etime[0]; len += iov[1].iov_len = sizeof (etime[0]); iov[2].iov_base = &etime[1]; len += iov[2].iov_len = sizeof (etime[1]); n = writev(zcp->state_fd, iov, 3); if (n < 0) { zed_log_msg(LOG_WARNING, "Failed to write state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } if (n != len) { errno = EIO; zed_log_msg(LOG_WARNING, "Failed to write state file \"%s\": Wrote %d of %d bytes", zcp->state_file, n, len); return (-1); } if (fdatasync(zcp->state_fd) < 0) { zed_log_msg(LOG_WARNING, "Failed to sync state file \"%s\": %s", zcp->state_file, strerror(errno)); return (-1); } return (0); } diff --git a/sys/contrib/openzfs/cmd/zed/zed_conf.h b/sys/contrib/openzfs/cmd/zed/zed_conf.h index 0b30a1503c52..84825a05da6b 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_conf.h +++ b/sys/contrib/openzfs/cmd/zed/zed_conf.h @@ -1,58 +1,59 @@ /* * This file is part of the ZFS Event Daemon (ZED). * * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. * Refer to the OpenZFS git commit log for authoritative copyright attribution. * * The contents of this file are subject to the terms of the * Common Development and Distribution License Version 1.0 (CDDL-1.0). * You can obtain a copy of the license from the top-level file * "OPENSOLARIS.LICENSE" or at . * You may not use this file except in compliance with the license. */ #ifndef ZED_CONF_H #define ZED_CONF_H #include #include #include "zed_strings.h" struct zed_conf { char *pid_file; /* abs path to pid file */ char *zedlet_dir; /* abs path to zedlet dir */ char *state_file; /* abs path to state file */ libzfs_handle_t *zfs_hdl; /* handle to libzfs */ zed_strings_t *zedlets; /* names of enabled zedlets */ char *path; /* custom $PATH for zedlets to use */ int pid_fd; /* fd to pid file for lock */ int state_fd; /* fd to state file */ int zevent_fd; /* fd for access to zevents */ int16_t max_jobs; /* max zedlets to run at one time */ + int32_t max_zevent_buf_len; /* max size of kernel event list */ boolean_t do_force:1; /* true if force enabled */ boolean_t do_foreground:1; /* true if run in foreground */ boolean_t do_memlock:1; /* true if locking memory */ boolean_t do_verbose:1; /* true if verbosity enabled */ boolean_t do_zero:1; /* true if zeroing state */ boolean_t do_idle:1; /* true if idle enabled */ }; void zed_conf_init(struct zed_conf *zcp); void zed_conf_destroy(struct zed_conf *zcp); void zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv); int zed_conf_scan_dir(struct zed_conf *zcp); int zed_conf_write_pid(struct zed_conf *zcp); int zed_conf_open_state(struct zed_conf *zcp); int zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[]); int zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[]); #endif /* !ZED_CONF_H */ diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c index d5cafdaff061..079c6a043e5d 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_event.c +++ b/sys/contrib/openzfs/cmd/zed/zed_event.c @@ -1,987 +1,998 @@ /* * This file is part of the ZFS Event Daemon (ZED). * * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. * Refer to the OpenZFS git commit log for authoritative copyright attribution. * * The contents of this file are subject to the terms of the * Common Development and Distribution License Version 1.0 (CDDL-1.0). * You can obtain a copy of the license from the top-level file * "OPENSOLARIS.LICENSE" or at . * You may not use this file except in compliance with the license. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zed.h" #include "zed_conf.h" #include "zed_disk_event.h" #include "zed_event.h" #include "zed_exec.h" #include "zed_file.h" #include "zed_log.h" #include "zed_strings.h" #include "agents/zfs_agents.h" #define MAXBUF 4096 +static int max_zevent_buf_len = 1 << 20; + /* * Open the libzfs interface. */ int zed_event_init(struct zed_conf *zcp) { if (!zcp) zed_log_die("Failed zed_event_init: %s", strerror(EINVAL)); zcp->zfs_hdl = libzfs_init(); if (!zcp->zfs_hdl) { if (zcp->do_idle) return (-1); zed_log_die("Failed to initialize libzfs"); } zcp->zevent_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC); if (zcp->zevent_fd < 0) { if (zcp->do_idle) return (-1); zed_log_die("Failed to open \"%s\": %s", ZFS_DEV, strerror(errno)); } zfs_agent_init(zcp->zfs_hdl); if (zed_disk_event_init() != 0) { if (zcp->do_idle) return (-1); zed_log_die("Failed to initialize disk events"); } + if (zcp->max_zevent_buf_len != 0) + max_zevent_buf_len = zcp->max_zevent_buf_len; + return (0); } /* * Close the libzfs interface. */ void zed_event_fini(struct zed_conf *zcp) { if (!zcp) zed_log_die("Failed zed_event_fini: %s", strerror(EINVAL)); zed_disk_event_fini(); zfs_agent_fini(); if (zcp->zevent_fd >= 0) { if (close(zcp->zevent_fd) < 0) zed_log_msg(LOG_WARNING, "Failed to close \"%s\": %s", ZFS_DEV, strerror(errno)); zcp->zevent_fd = -1; } if (zcp->zfs_hdl) { libzfs_fini(zcp->zfs_hdl); zcp->zfs_hdl = NULL; } zed_exec_fini(); } static void _bump_event_queue_length(void) { int zzlm = -1, wr; char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */ - long int qlen; + long int qlen, orig_qlen; zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR); if (zzlm < 0) goto done; if (read(zzlm, qlen_buf, sizeof (qlen_buf)) < 0) goto done; qlen_buf[sizeof (qlen_buf) - 1] = '\0'; errno = 0; - qlen = strtol(qlen_buf, NULL, 10); + orig_qlen = qlen = strtol(qlen_buf, NULL, 10); if (errno == ERANGE) goto done; if (qlen <= 0) qlen = 512; /* default zfs_zevent_len_max value */ else qlen *= 2; - if (qlen > INT_MAX) - qlen = INT_MAX; + /* + * Don't consume all of kernel memory with event logs if something + * goes wrong. + */ + if (qlen > max_zevent_buf_len) + qlen = max_zevent_buf_len; + if (qlen == orig_qlen) + goto done; wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen); if (pwrite(zzlm, qlen_buf, wr, 0) < 0) goto done; zed_log_msg(LOG_WARNING, "Bumping queue length to %ld", qlen); done: if (zzlm > -1) (void) close(zzlm); } /* * Seek to the event specified by [saved_eid] and [saved_etime]. * This protects against processing a given event more than once. * Return 0 upon a successful seek to the specified event, or -1 otherwise. * * A zevent is considered to be uniquely specified by its (eid,time) tuple. * The unsigned 64b eid is set to 1 when the kernel module is loaded, and * incremented by 1 for each new event. Since the state file can persist * across a kernel module reload, the time must be checked to ensure a match. */ int zed_event_seek(struct zed_conf *zcp, uint64_t saved_eid, int64_t saved_etime[]) { uint64_t eid; int found; nvlist_t *nvl; int n_dropped; int64_t *etime; uint_t nelem; int rv; if (!zcp) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to seek zevent: %s", strerror(errno)); return (-1); } eid = 0; found = 0; while ((eid < saved_eid) && !found) { rv = zpool_events_next(zcp->zfs_hdl, &nvl, &n_dropped, ZEVENT_NONBLOCK, zcp->zevent_fd); if ((rv != 0) || !nvl) break; if (n_dropped > 0) { zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped); _bump_event_queue_length(); } if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid"); } else if (nvlist_lookup_int64_array(nvl, "time", &etime, &nelem) != 0) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent time (eid=%llu)", eid); } else if (nelem != 2) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent time (eid=%llu, nelem=%u)", eid, nelem); } else if ((eid != saved_eid) || (etime[0] != saved_etime[0]) || (etime[1] != saved_etime[1])) { /* no-op */ } else { found = 1; } free(nvl); } if (!found && (saved_eid > 0)) { if (zpool_events_seek(zcp->zfs_hdl, ZEVENT_SEEK_START, zcp->zevent_fd) < 0) zed_log_msg(LOG_WARNING, "Failed to seek to eid=0"); else eid = 0; } zed_log_msg(LOG_NOTICE, "Processing events since eid=%llu", eid); return (found ? 0 : -1); } /* * Return non-zero if nvpair [name] should be formatted in hex; o/w, return 0. */ static int _zed_event_value_is_hex(const char *name) { const char *hex_suffix[] = { "_guid", "_guids", NULL }; const char **pp; char *p; if (!name) return (0); for (pp = hex_suffix; *pp; pp++) { p = strstr(name, *pp); if (p && strlen(p) == strlen(*pp)) return (1); } return (0); } /* * Add an environment variable for [eid] to the container [zsp]. * * The variable name is the concatenation of [prefix] and [name] converted to * uppercase with non-alphanumeric characters converted to underscores; * [prefix] is optional, and [name] must begin with an alphabetic character. * If the converted variable name already exists within the container [zsp], * its existing value will be replaced with the new value. * * The variable value is specified by the format string [fmt]. * * Returns 0 on success, and -1 on error (with errno set). * * All environment variables in [zsp] should be added through this function. */ static __attribute__((format(printf, 5, 6))) int _zed_event_add_var(uint64_t eid, zed_strings_t *zsp, const char *prefix, const char *name, const char *fmt, ...) { char keybuf[MAXBUF]; char valbuf[MAXBUF]; char *dstp; const char *srcp; const char *lastp; int n; int buflen; va_list vargs; assert(zsp != NULL); assert(fmt != NULL); if (!name) { errno = EINVAL; zed_log_msg(LOG_WARNING, "Failed to add variable for eid=%llu: Name is empty", eid); return (-1); } else if (!isalpha(name[0])) { errno = EINVAL; zed_log_msg(LOG_WARNING, "Failed to add variable for eid=%llu: " "Name \"%s\" is invalid", eid, name); return (-1); } /* * Construct the string key by converting PREFIX (if present) and NAME. */ dstp = keybuf; lastp = keybuf + sizeof (keybuf); if (prefix) { for (srcp = prefix; *srcp && (dstp < lastp); srcp++) *dstp++ = isalnum(*srcp) ? toupper(*srcp) : '_'; } for (srcp = name; *srcp && (dstp < lastp); srcp++) *dstp++ = isalnum(*srcp) ? toupper(*srcp) : '_'; if (dstp == lastp) { errno = ENAMETOOLONG; zed_log_msg(LOG_WARNING, "Failed to add variable for eid=%llu: Name too long", eid); return (-1); } *dstp = '\0'; /* * Construct the string specified by "[PREFIX][NAME]=[FMT]". */ dstp = valbuf; buflen = sizeof (valbuf); n = strlcpy(dstp, keybuf, buflen); if (n >= sizeof (valbuf)) { errno = EMSGSIZE; zed_log_msg(LOG_WARNING, "Failed to add %s for eid=%llu: %s", keybuf, eid, "Exceeded buffer size"); return (-1); } dstp += n; buflen -= n; *dstp++ = '='; buflen--; if (buflen <= 0) { errno = EMSGSIZE; zed_log_msg(LOG_WARNING, "Failed to add %s for eid=%llu: %s", keybuf, eid, "Exceeded buffer size"); return (-1); } va_start(vargs, fmt); n = vsnprintf(dstp, buflen, fmt, vargs); va_end(vargs); if ((n < 0) || (n >= buflen)) { errno = EMSGSIZE; zed_log_msg(LOG_WARNING, "Failed to add %s for eid=%llu: %s", keybuf, eid, "Exceeded buffer size"); return (-1); } else if (zed_strings_add(zsp, keybuf, valbuf) < 0) { zed_log_msg(LOG_WARNING, "Failed to add %s for eid=%llu: %s", keybuf, eid, strerror(errno)); return (-1); } return (0); } static int _zed_event_add_array_err(uint64_t eid, const char *name) { errno = EMSGSIZE; zed_log_msg(LOG_WARNING, "Failed to convert nvpair \"%s\" for eid=%llu: " "Exceeded buffer size", name, eid); return (-1); } static int _zed_event_add_int8_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; int8_t *i8p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_INT8_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_int8_array(nvp, &i8p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%d ", i8p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_uint8_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; uint8_t *u8p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_UINT8_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_uint8_array(nvp, &u8p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%u ", u8p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_int16_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; int16_t *i16p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_INT16_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_int16_array(nvp, &i16p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%d ", i16p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_uint16_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; uint16_t *u16p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_UINT16_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_uint16_array(nvp, &u16p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%u ", u16p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_int32_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; int32_t *i32p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_INT32_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_int32_array(nvp, &i32p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%d ", i32p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_uint32_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; uint32_t *u32p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_uint32_array(nvp, &u32p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%u ", u32p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_int64_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; int64_t *i64p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_INT64_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_int64_array(nvp, &i64p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%lld ", (u_longlong_t)i64p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_uint64_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; const char *fmt; uint64_t *u64p; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_UINT64_ARRAY)); name = nvpair_name(nvp); fmt = _zed_event_value_is_hex(name) ? "0x%.16llX " : "%llu "; (void) nvpair_value_uint64_array(nvp, &u64p, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, fmt, (u_longlong_t)u64p[i]); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } static int _zed_event_add_string_array(uint64_t eid, zed_strings_t *zsp, const char *prefix, nvpair_t *nvp) { char buf[MAXBUF]; int buflen = sizeof (buf); const char *name; char **strp; uint_t nelem; uint_t i; char *p; int n; assert((nvp != NULL) && (nvpair_type(nvp) == DATA_TYPE_STRING_ARRAY)); name = nvpair_name(nvp); (void) nvpair_value_string_array(nvp, &strp, &nelem); for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { n = snprintf(p, buflen, "%s ", strp[i] ? strp[i] : ""); if ((n < 0) || (n >= buflen)) return (_zed_event_add_array_err(eid, name)); p += n; buflen -= n; } if (nelem > 0) *--p = '\0'; return (_zed_event_add_var(eid, zsp, prefix, name, "%s", buf)); } /* * Convert the nvpair [nvp] to a string which is added to the environment * of the child process. * Return 0 on success, -1 on error. */ static void _zed_event_add_nvpair(uint64_t eid, zed_strings_t *zsp, nvpair_t *nvp) { const char *name; data_type_t type; const char *prefix = ZEVENT_VAR_PREFIX; boolean_t b; double d; uint8_t i8; uint16_t i16; uint32_t i32; uint64_t i64; char *str; assert(zsp != NULL); assert(nvp != NULL); name = nvpair_name(nvp); type = nvpair_type(nvp); switch (type) { case DATA_TYPE_BOOLEAN: _zed_event_add_var(eid, zsp, prefix, name, "%s", "1"); break; case DATA_TYPE_BOOLEAN_VALUE: (void) nvpair_value_boolean_value(nvp, &b); _zed_event_add_var(eid, zsp, prefix, name, "%s", b ? "1" : "0"); break; case DATA_TYPE_BYTE: (void) nvpair_value_byte(nvp, &i8); _zed_event_add_var(eid, zsp, prefix, name, "%d", i8); break; case DATA_TYPE_INT8: (void) nvpair_value_int8(nvp, (int8_t *)&i8); _zed_event_add_var(eid, zsp, prefix, name, "%d", i8); break; case DATA_TYPE_UINT8: (void) nvpair_value_uint8(nvp, &i8); _zed_event_add_var(eid, zsp, prefix, name, "%u", i8); break; case DATA_TYPE_INT16: (void) nvpair_value_int16(nvp, (int16_t *)&i16); _zed_event_add_var(eid, zsp, prefix, name, "%d", i16); break; case DATA_TYPE_UINT16: (void) nvpair_value_uint16(nvp, &i16); _zed_event_add_var(eid, zsp, prefix, name, "%u", i16); break; case DATA_TYPE_INT32: (void) nvpair_value_int32(nvp, (int32_t *)&i32); _zed_event_add_var(eid, zsp, prefix, name, "%d", i32); break; case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &i32); _zed_event_add_var(eid, zsp, prefix, name, "%u", i32); break; case DATA_TYPE_INT64: (void) nvpair_value_int64(nvp, (int64_t *)&i64); _zed_event_add_var(eid, zsp, prefix, name, "%lld", (longlong_t)i64); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &i64); _zed_event_add_var(eid, zsp, prefix, name, (_zed_event_value_is_hex(name) ? "0x%.16llX" : "%llu"), (u_longlong_t)i64); /* * shadow readable strings for vdev state pairs */ if (strcmp(name, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE) == 0 || strcmp(name, FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE) == 0) { char alt[32]; (void) snprintf(alt, sizeof (alt), "%s_str", name); _zed_event_add_var(eid, zsp, prefix, alt, "%s", zpool_state_to_name(i64, VDEV_AUX_NONE)); } else /* * shadow readable strings for pool state */ if (strcmp(name, FM_EREPORT_PAYLOAD_ZFS_POOL_STATE) == 0) { char alt[32]; (void) snprintf(alt, sizeof (alt), "%s_str", name); _zed_event_add_var(eid, zsp, prefix, alt, "%s", zpool_pool_state_to_name(i64)); } break; case DATA_TYPE_DOUBLE: (void) nvpair_value_double(nvp, &d); _zed_event_add_var(eid, zsp, prefix, name, "%g", d); break; case DATA_TYPE_HRTIME: (void) nvpair_value_hrtime(nvp, (hrtime_t *)&i64); _zed_event_add_var(eid, zsp, prefix, name, "%llu", (u_longlong_t)i64); break; case DATA_TYPE_STRING: (void) nvpair_value_string(nvp, &str); _zed_event_add_var(eid, zsp, prefix, name, "%s", (str ? str : "")); break; case DATA_TYPE_INT8_ARRAY: _zed_event_add_int8_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_UINT8_ARRAY: _zed_event_add_uint8_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_INT16_ARRAY: _zed_event_add_int16_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_UINT16_ARRAY: _zed_event_add_uint16_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_INT32_ARRAY: _zed_event_add_int32_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_UINT32_ARRAY: _zed_event_add_uint32_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_INT64_ARRAY: _zed_event_add_int64_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_UINT64_ARRAY: _zed_event_add_uint64_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_STRING_ARRAY: _zed_event_add_string_array(eid, zsp, prefix, nvp); break; case DATA_TYPE_NVLIST: case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_NVLIST_ARRAY: _zed_event_add_var(eid, zsp, prefix, name, "_NOT_IMPLEMENTED_"); break; default: errno = EINVAL; zed_log_msg(LOG_WARNING, "Failed to convert nvpair \"%s\" for eid=%llu: " "Unrecognized type=%u", name, eid, (unsigned int) type); break; } } /* * Restrict various environment variables to safe and sane values * when constructing the environment for the child process, unless * we're running with a custom $PATH (like under the ZFS test suite). * * Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1. */ static void _zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp, const char *path) { const char *env_restrict[][2] = { { "IFS", " \t\n" }, { "PATH", _PATH_STDPATH }, { "ZDB", SBINDIR "/zdb" }, { "ZED", SBINDIR "/zed" }, { "ZFS", SBINDIR "/zfs" }, { "ZINJECT", SBINDIR "/zinject" }, { "ZPOOL", SBINDIR "/zpool" }, { "ZFS_ALIAS", ZFS_META_ALIAS }, { "ZFS_VERSION", ZFS_META_VERSION }, { "ZFS_RELEASE", ZFS_META_RELEASE }, { NULL, NULL } }; /* * If we have a custom $PATH, use the default ZFS binary locations * instead of the hard-coded ones. */ const char *env_path[][2] = { { "IFS", " \t\n" }, { "PATH", NULL }, /* $PATH copied in later on */ { "ZDB", "zdb" }, { "ZED", "zed" }, { "ZFS", "zfs" }, { "ZINJECT", "zinject" }, { "ZPOOL", "zpool" }, { "ZFS_ALIAS", ZFS_META_ALIAS }, { "ZFS_VERSION", ZFS_META_VERSION }, { "ZFS_RELEASE", ZFS_META_RELEASE }, { NULL, NULL } }; const char *(*pa)[2]; assert(zsp != NULL); pa = path != NULL ? env_path : env_restrict; for (; *(*pa); pa++) { /* Use our custom $PATH if we have one */ if (path != NULL && strcmp((*pa)[0], "PATH") == 0) (*pa)[1] = path; _zed_event_add_var(eid, zsp, NULL, (*pa)[0], "%s", (*pa)[1]); } } /* * Preserve specified variables from the parent environment * when constructing the environment for the child process. * * Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1. */ static void _zed_event_add_env_preserve(uint64_t eid, zed_strings_t *zsp) { const char *env_preserve[] = { "TZ", NULL }; const char **keyp; const char *val; assert(zsp != NULL); for (keyp = env_preserve; *keyp; keyp++) { if ((val = getenv(*keyp))) _zed_event_add_var(eid, zsp, NULL, *keyp, "%s", val); } } /* * Compute the "subclass" by removing the first 3 components of [class] * (which will always be of the form "*.fs.zfs"). Return a pointer inside * the string [class], or NULL if insufficient components exist. */ static const char * _zed_event_get_subclass(const char *class) { const char *p; int i; if (!class) return (NULL); p = class; for (i = 0; i < 3; i++) { p = strchr(p, '.'); if (!p) break; p++; } return (p); } /* * Convert the zevent time from a 2-element array of 64b integers * into a more convenient form: * - TIME_SECS is the second component of the time. * - TIME_NSECS is the nanosecond component of the time. * - TIME_STRING is an almost-RFC3339-compliant string representation. */ static void _zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[]) { struct tm stp; char buf[32]; assert(zsp != NULL); assert(etime != NULL); _zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX, "TIME_SECS", "%" PRId64, etime[0]); _zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX, "TIME_NSECS", "%" PRId64, etime[1]); if (!localtime_r((const time_t *) &etime[0], &stp)) { zed_log_msg(LOG_WARNING, "Failed to add %s%s for eid=%llu: %s", ZEVENT_VAR_PREFIX, "TIME_STRING", eid, "localtime error"); } else if (!strftime(buf, sizeof (buf), "%Y-%m-%d %H:%M:%S%z", &stp)) { zed_log_msg(LOG_WARNING, "Failed to add %s%s for eid=%llu: %s", ZEVENT_VAR_PREFIX, "TIME_STRING", eid, "strftime error"); } else { _zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX, "TIME_STRING", "%s", buf); } } /* * Service the next zevent, blocking until one is available. */ int zed_event_service(struct zed_conf *zcp) { nvlist_t *nvl; nvpair_t *nvp; int n_dropped; zed_strings_t *zsp; uint64_t eid; int64_t *etime; uint_t nelem; char *class; const char *subclass; int rv; if (!zcp) { errno = EINVAL; zed_log_msg(LOG_ERR, "Failed to service zevent: %s", strerror(errno)); return (EINVAL); } rv = zpool_events_next(zcp->zfs_hdl, &nvl, &n_dropped, ZEVENT_NONE, zcp->zevent_fd); if ((rv != 0) || !nvl) return (errno); if (n_dropped > 0) { zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped); _bump_event_queue_length(); } if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid"); } else if (nvlist_lookup_int64_array( nvl, "time", &etime, &nelem) != 0) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent time (eid=%llu)", eid); } else if (nelem != 2) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent time (eid=%llu, nelem=%u)", eid, nelem); } else if (nvlist_lookup_string(nvl, "class", &class) != 0) { zed_log_msg(LOG_WARNING, "Failed to lookup zevent class (eid=%llu)", eid); } else { /* let internal modules see this event first */ zfs_agent_post_event(class, NULL, nvl); zsp = zed_strings_create(); nvp = NULL; while ((nvp = nvlist_next_nvpair(nvl, nvp))) _zed_event_add_nvpair(eid, zsp, nvp); _zed_event_add_env_restrict(eid, zsp, zcp->path); _zed_event_add_env_preserve(eid, zsp); _zed_event_add_var(eid, zsp, ZED_VAR_PREFIX, "PID", "%d", (int)getpid()); _zed_event_add_var(eid, zsp, ZED_VAR_PREFIX, "ZEDLET_DIR", "%s", zcp->zedlet_dir); subclass = _zed_event_get_subclass(class); _zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX, "SUBCLASS", "%s", (subclass ? subclass : class)); _zed_event_add_time_strings(eid, zsp, etime); zed_exec_process(eid, class, subclass, zcp, zsp); zed_conf_write_state(zcp, eid, etime); zed_strings_destroy(zsp); } nvlist_free(nvl); return (0); } diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in index befd163b6536..9e73d1a78724 100644 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in @@ -1,12 +1,12 @@ [Unit] Description=Snapshot bootfs just before it is mounted Requisite=zfs-import.target After=zfs-import.target dracut-pre-mount.service Before=dracut-mount.service DefaultDependencies=no ConditionKernelCommandLine=bootfs.snapshot [Service] Type=oneshot -ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS"; SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"' +ExecStart=-/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS"; SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"' RemainAfterExit=yes diff --git a/sys/contrib/openzfs/include/sys/dsl_crypt.h b/sys/contrib/openzfs/include/sys/dsl_crypt.h index db594eece1c3..72716e296c9e 100644 --- a/sys/contrib/openzfs/include/sys/dsl_crypt.h +++ b/sys/contrib/openzfs/include/sys/dsl_crypt.h @@ -1,226 +1,227 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2017, Datto, Inc. All rights reserved. */ #ifndef _SYS_DSL_CRYPT_H #define _SYS_DSL_CRYPT_H #include #include #include #include #include /* * ZAP entry keys for DSL Crypto Keys stored on disk. In addition, * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are * also maintained here using their respective property names. */ #define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE" #define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID" #define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV" #define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC" #define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1" #define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" #define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" #define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" #define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION" /* * In-memory representation of a wrapping key. One of these structs will exist * for each encryption root with its key loaded. */ typedef struct dsl_wrapping_key { /* link on spa_keystore_t:sk_wkeys */ avl_node_t wk_avl_link; /* keyformat property enum */ zfs_keyformat_t wk_keyformat; /* the pbkdf2 salt, if the keyformat is of type passphrase */ uint64_t wk_salt; /* the pbkdf2 iterations, if the keyformat is of type passphrase */ uint64_t wk_iters; /* actual wrapping key */ crypto_key_t wk_key; /* refcount of number of dsl_crypto_key_t's holding this struct */ zfs_refcount_t wk_refcnt; /* dsl directory object that owns this wrapping key */ uint64_t wk_ddobj; } dsl_wrapping_key_t; /* enum of commands indicating special actions that should be run */ typedef enum dcp_cmd { /* key creation commands */ DCP_CMD_NONE = 0, /* no specific command */ DCP_CMD_RAW_RECV, /* raw receive */ /* key changing commands */ DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */ DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */ DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */ DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */ DCP_CMD_MAX } dcp_cmd_t; /* * This struct is a simple wrapper around all the parameters that are usually * required to setup encryption. It exists so that all of the params can be * passed around the kernel together for convenience. */ typedef struct dsl_crypto_params { /* command indicating intended action */ dcp_cmd_t cp_cmd; /* the encryption algorithm */ enum zio_encrypt cp_crypt; /* keylocation property string */ char *cp_keylocation; /* the wrapping key */ dsl_wrapping_key_t *cp_wkey; } dsl_crypto_params_t; /* * In-memory representation of a DSL Crypto Key object. One of these structs * (and corresponding on-disk ZAP object) will exist for each encrypted * clone family that is mounted or otherwise reading protected data. */ typedef struct dsl_crypto_key { /* link on spa_keystore_t:sk_dsl_keys */ avl_node_t dck_avl_link; /* refcount of holders of this key */ zfs_refcount_t dck_holds; /* master key used to derive encryption keys */ zio_crypt_key_t dck_key; /* wrapping key for syncing this structure to disk */ dsl_wrapping_key_t *dck_wkey; /* on-disk object id */ uint64_t dck_obj; } dsl_crypto_key_t; /* * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used * to look up the corresponding dsl_crypto_key_t from the zio layer for * performing data encryption and decryption. */ typedef struct dsl_key_mapping { /* link on spa_keystore_t:sk_key_mappings */ avl_node_t km_avl_link; /* refcount of how many users are depending on this mapping */ zfs_refcount_t km_refcnt; /* dataset this crypto key belongs to (index) */ uint64_t km_dsobj; /* crypto key (value) of this record */ dsl_crypto_key_t *km_key; } dsl_key_mapping_t; /* in memory structure for holding all wrapping and dsl keys */ typedef struct spa_keystore { /* lock for protecting sk_dsl_keys */ krwlock_t sk_dk_lock; /* tree of all dsl_crypto_key_t's */ avl_tree_t sk_dsl_keys; /* lock for protecting sk_key_mappings */ krwlock_t sk_km_lock; /* tree of all dsl_key_mapping_t's, indexed by dsobj */ avl_tree_t sk_key_mappings; /* lock for protecting the wrapping keys tree */ krwlock_t sk_wkeys_lock; /* tree of all dsl_wrapping_key_t's, indexed by ddobj */ avl_tree_t sk_wkeys; } spa_keystore_t; int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out); void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd); void spa_keystore_init(spa_keystore_t *sk); void spa_keystore_fini(spa_keystore_t *sk); void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, const void *tag); int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey); int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, boolean_t noop); int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj); int spa_keystore_unload_wkey(const char *dsname); int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, const void *tag, dsl_key_mapping_t **km_out); int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, const void *tag); void key_mapping_add_ref(dsl_key_mapping_t *km, const void *tag); void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, const void *tag); int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, const void *tag, dsl_crypto_key_t **dck_out); int dsl_crypto_populate_key_nvlist(struct objset *os, uint64_t from_ivset_guid, nvlist_t **nvl_out); int dsl_crypto_recv_raw_key_check(struct dsl_dataset *ds, nvlist_t *nvl, dmu_tx_t *tx); void dsl_crypto_recv_raw_key_sync(struct dsl_dataset *ds, nvlist_t *nvl, dmu_tx_t *tx); int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key); int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp); int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin); void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, dmu_tx_t *tx); int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt); void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx); uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt); int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, uint8_t *mac); int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, boolean_t byteswap); int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); +zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd); #endif diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c index b6e64274cd04..047a254882f7 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c @@ -1,5589 +1,5589 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov * Copyright 2017-2018 RackTop Systems. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, loli10K * Copyright (c) 2021 Matt Fiddaman */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_IDMAP #include #include #include #endif /* HAVE_IDMAP */ #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "libzfs_impl.h" #include "zfs_deleg.h" static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); /* * Given a single type (not a mask of types), return the type in a human * readable form. */ const char * zfs_type_to_name(zfs_type_t type) { switch (type) { case ZFS_TYPE_FILESYSTEM: return (dgettext(TEXT_DOMAIN, "filesystem")); case ZFS_TYPE_SNAPSHOT: return (dgettext(TEXT_DOMAIN, "snapshot")); case ZFS_TYPE_VOLUME: return (dgettext(TEXT_DOMAIN, "volume")); case ZFS_TYPE_POOL: return (dgettext(TEXT_DOMAIN, "pool")); case ZFS_TYPE_BOOKMARK: return (dgettext(TEXT_DOMAIN, "bookmark")); default: assert(!"unhandled zfs_type_t"); } return (NULL); } /* * Validate a ZFS path. This is used even before trying to open the dataset, to * provide a more meaningful error message. We call zfs_error_aux() to * explain exactly why the name was not valid. */ int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, boolean_t modifying) { namecheck_err_t why; char what; if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshot delimiter '@' is not expected here")); return (0); } if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing '@' delimiter in snapshot name")); return (0); } if (!(type & ZFS_TYPE_BOOKMARK) && strchr(path, '#') != NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "bookmark delimiter '#' is not expected here")); return (0); } if (type == ZFS_TYPE_BOOKMARK && strchr(path, '#') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing '#' delimiter in bookmark name")); return (0); } if (modifying && strchr(path, '%') != NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character %c in name"), '%'); return (0); } if (entity_namecheck(path, &why, &what) != 0) { if (hdl != NULL) { switch (why) { case NAME_ERR_TOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is too long")); break; case NAME_ERR_LEADING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "leading slash in name")); break; case NAME_ERR_EMPTY_COMPONENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "empty component or misplaced '@'" " or '#' delimiter in name")); break; case NAME_ERR_TRAILING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "trailing slash in name")); break; case NAME_ERR_INVALCHAR: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character " "'%c' in name"), what); break; case NAME_ERR_MULTIPLE_DELIMITERS: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "multiple '@' and/or '#' delimiters in " "name")); break; case NAME_ERR_NOLETTER: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool doesn't begin with a letter")); break; case NAME_ERR_RESERVED: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is reserved")); break; case NAME_ERR_DISKLIKE: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "reserved disk name")); break; case NAME_ERR_SELF_REF: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "self reference, '.' is found in name")); break; case NAME_ERR_PARENT_REF: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent reference, '..' is found in name")); break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); break; } } return (0); } return (-1); } int zfs_name_valid(const char *name, zfs_type_t type) { if (type == ZFS_TYPE_POOL) return (zpool_name_valid(NULL, B_FALSE, name)); return (zfs_validate_name(NULL, name, type, B_FALSE)); } /* * This function takes the raw DSL properties, and filters out the user-defined * properties into a separate nvlist. */ static nvlist_t * process_user_props(zfs_handle_t *zhp, nvlist_t *props) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvpair_t *elem; nvlist_t *nvl; if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (NULL); } elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { if (!zfs_prop_user(nvpair_name(elem))) continue; nvlist_t *propval = fnvpair_value_nvlist(elem); if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) { nvlist_free(nvl); (void) no_memory(hdl); return (NULL); } } return (nvl); } static zpool_handle_t * zpool_add_handle(zfs_handle_t *zhp, const char *pool_name) { libzfs_handle_t *hdl = zhp->zfs_hdl; zpool_handle_t *zph; if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) { if (hdl->libzfs_pool_handles != NULL) zph->zpool_next = hdl->libzfs_pool_handles; hdl->libzfs_pool_handles = zph; } return (zph); } static zpool_handle_t * zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len) { libzfs_handle_t *hdl = zhp->zfs_hdl; zpool_handle_t *zph = hdl->libzfs_pool_handles; while ((zph != NULL) && (strncmp(pool_name, zpool_get_name(zph), len) != 0)) zph = zph->zpool_next; return (zph); } /* * Returns a handle to the pool that contains the provided dataset. * If a handle to that pool already exists then that handle is returned. * Otherwise, a new handle is created and added to the list of handles. */ static zpool_handle_t * zpool_handle(zfs_handle_t *zhp) { char *pool_name; int len; zpool_handle_t *zph; len = strcspn(zhp->zfs_name, "/@#") + 1; pool_name = zfs_alloc(zhp->zfs_hdl, len); (void) strlcpy(pool_name, zhp->zfs_name, len); zph = zpool_find_handle(zhp, pool_name, len); if (zph == NULL) zph = zpool_add_handle(zhp, pool_name); free(pool_name); return (zph); } void zpool_free_handles(libzfs_handle_t *hdl) { zpool_handle_t *next, *zph = hdl->libzfs_pool_handles; while (zph != NULL) { next = zph->zpool_next; zpool_close(zph); zph = next; } hdl->libzfs_pool_handles = NULL; } /* * Utility function to gather stats (objset and zpl) for the given object. */ static int get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc) { libzfs_handle_t *hdl = zhp->zfs_hdl; (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, zc) != 0) { if (errno == ENOMEM) zcmd_expand_dst_nvlist(hdl, zc); else return (-1); } return (0); } /* * Utility function to get the received properties of the given object. */ static int get_recvd_props_ioctl(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *recvdprops; zfs_cmd_t zc = {"\0"}; int err; zcmd_alloc_dst_nvlist(hdl, &zc, 0); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) { if (errno == ENOMEM) zcmd_expand_dst_nvlist(hdl, &zc); else { zcmd_free_nvlists(&zc); return (-1); } } err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops); zcmd_free_nvlists(&zc); if (err != 0) return (-1); nvlist_free(zhp->zfs_recvd_props); zhp->zfs_recvd_props = recvdprops; return (0); } static int put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc) { nvlist_t *allprops, *userprops; zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */ if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) { return (-1); } /* * XXX Why do we store the user props separately, in addition to * storing them in zfs_props? */ if ((userprops = process_user_props(zhp, allprops)) == NULL) { nvlist_free(allprops); return (-1); } nvlist_free(zhp->zfs_props); nvlist_free(zhp->zfs_user_props); zhp->zfs_props = allprops; zhp->zfs_user_props = userprops; return (0); } static int get_stats(zfs_handle_t *zhp) { int rc = 0; zfs_cmd_t zc = {"\0"}; zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0); if (get_stats_ioctl(zhp, &zc) != 0) rc = -1; else if (put_stats_zhdl(zhp, &zc) != 0) rc = -1; zcmd_free_nvlists(&zc); return (rc); } /* * Refresh the properties currently stored in the handle. */ void zfs_refresh_properties(zfs_handle_t *zhp) { (void) get_stats(zhp); } /* * Makes a handle from the given dataset name. Used by zfs_open() and * zfs_iter_* to create child handles on the fly. */ static int make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) { if (put_stats_zhdl(zhp, zc) != 0) return (-1); /* * We've managed to open the dataset and gather statistics. Determine * the high-level type. */ if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) { zhp->zfs_head_type = ZFS_TYPE_VOLUME; } else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) { zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; } else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) { errno = EINVAL; return (-1); } else if (zhp->zfs_dmustats.dds_inconsistent) { errno = EBUSY; return (-1); } else { abort(); } if (zhp->zfs_dmustats.dds_is_snapshot) zhp->zfs_type = ZFS_TYPE_SNAPSHOT; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) zhp->zfs_type = ZFS_TYPE_VOLUME; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_type = ZFS_TYPE_FILESYSTEM; else abort(); /* we should never see any other types */ if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) return (-1); return (0); } zfs_handle_t * make_dataset_handle(libzfs_handle_t *hdl, const char *path) { zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t)); if (zhp == NULL) return (NULL); zhp->zfs_hdl = hdl; (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); zcmd_alloc_dst_nvlist(hdl, &zc, 0); if (get_stats_ioctl(zhp, &zc) == -1) { zcmd_free_nvlists(&zc); free(zhp); return (NULL); } if (make_dataset_handle_common(zhp, &zc) == -1) { free(zhp); zhp = NULL; } zcmd_free_nvlists(&zc); return (zhp); } zfs_handle_t * make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc) { zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t)); if (zhp == NULL) return (NULL); zhp->zfs_hdl = hdl; (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); if (make_dataset_handle_common(zhp, zc) == -1) { free(zhp); return (NULL); } return (zhp); } zfs_handle_t * make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc) { zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t)); if (zhp == NULL) return (NULL); zhp->zfs_hdl = pzhp->zfs_hdl; (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); zhp->zfs_head_type = pzhp->zfs_type; zhp->zfs_type = ZFS_TYPE_SNAPSHOT; zhp->zpool_hdl = zpool_handle(zhp); zhp->zfs_dmustats = zc->zc_objset_stats; return (zhp); } zfs_handle_t * zfs_handle_dup(zfs_handle_t *zhp_orig) { zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t)); if (zhp == NULL) return (NULL); zhp->zfs_hdl = zhp_orig->zfs_hdl; zhp->zpool_hdl = zhp_orig->zpool_hdl; (void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name, sizeof (zhp->zfs_name)); zhp->zfs_type = zhp_orig->zfs_type; zhp->zfs_head_type = zhp_orig->zfs_head_type; zhp->zfs_dmustats = zhp_orig->zfs_dmustats; if (zhp_orig->zfs_props != NULL) { if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) { (void) no_memory(zhp->zfs_hdl); zfs_close(zhp); return (NULL); } } if (zhp_orig->zfs_user_props != NULL) { if (nvlist_dup(zhp_orig->zfs_user_props, &zhp->zfs_user_props, 0) != 0) { (void) no_memory(zhp->zfs_hdl); zfs_close(zhp); return (NULL); } } if (zhp_orig->zfs_recvd_props != NULL) { if (nvlist_dup(zhp_orig->zfs_recvd_props, &zhp->zfs_recvd_props, 0)) { (void) no_memory(zhp->zfs_hdl); zfs_close(zhp); return (NULL); } } zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck; if (zhp_orig->zfs_mntopts != NULL) { zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl, zhp_orig->zfs_mntopts); } zhp->zfs_props_table = zhp_orig->zfs_props_table; return (zhp); } boolean_t zfs_bookmark_exists(const char *path) { nvlist_t *bmarks; nvlist_t *props; char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *bmark_name; char *pound; int err; boolean_t rv; (void) strlcpy(fsname, path, sizeof (fsname)); pound = strchr(fsname, '#'); if (pound == NULL) return (B_FALSE); *pound = '\0'; bmark_name = pound + 1; props = fnvlist_alloc(); err = lzc_get_bookmarks(fsname, props, &bmarks); nvlist_free(props); if (err != 0) { nvlist_free(bmarks); return (B_FALSE); } rv = nvlist_exists(bmarks, bmark_name); nvlist_free(bmarks); return (rv); } zfs_handle_t * make_bookmark_handle(zfs_handle_t *parent, const char *path, nvlist_t *bmark_props) { zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t)); if (zhp == NULL) return (NULL); /* Fill in the name. */ zhp->zfs_hdl = parent->zfs_hdl; (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); /* Set the property lists. */ if (nvlist_dup(bmark_props, &zhp->zfs_props, 0) != 0) { free(zhp); return (NULL); } /* Set the types. */ zhp->zfs_head_type = parent->zfs_head_type; zhp->zfs_type = ZFS_TYPE_BOOKMARK; if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) { nvlist_free(zhp->zfs_props); free(zhp); return (NULL); } return (zhp); } struct zfs_open_bookmarks_cb_data { const char *path; zfs_handle_t *zhp; }; static int zfs_open_bookmarks_cb(zfs_handle_t *zhp, void *data) { struct zfs_open_bookmarks_cb_data *dp = data; /* * Is it the one we are looking for? */ if (strcmp(dp->path, zfs_get_name(zhp)) == 0) { /* * We found it. Save it and let the caller know we are done. */ dp->zhp = zhp; return (EEXIST); } /* * Not found. Close the handle and ask for another one. */ zfs_close(zhp); return (0); } /* * Opens the given snapshot, bookmark, filesystem, or volume. The 'types' * argument is a mask of acceptable types. The function will print an * appropriate error message and return NULL if it can't be opened. */ zfs_handle_t * zfs_open(libzfs_handle_t *hdl, const char *path, int types) { zfs_handle_t *zhp; char errbuf[ERRBUFLEN]; char *bookp; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); /* * Validate the name before we even try to open it. */ if (!zfs_validate_name(hdl, path, types, B_FALSE)) { (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); return (NULL); } /* * Bookmarks needs to be handled separately. */ bookp = strchr(path, '#'); if (bookp == NULL) { /* * Try to get stats for the dataset, which will tell us if it * exists. */ errno = 0; if ((zhp = make_dataset_handle(hdl, path)) == NULL) { (void) zfs_standard_error(hdl, errno, errbuf); return (NULL); } } else { char dsname[ZFS_MAX_DATASET_NAME_LEN]; zfs_handle_t *pzhp; struct zfs_open_bookmarks_cb_data cb_data = {path, NULL}; /* * We need to cut out '#' and everything after '#' * to get the parent dataset name only. */ assert(bookp - path < sizeof (dsname)); (void) strncpy(dsname, path, bookp - path); dsname[bookp - path] = '\0'; /* * Create handle for the parent dataset. */ errno = 0; if ((pzhp = make_dataset_handle(hdl, dsname)) == NULL) { (void) zfs_standard_error(hdl, errno, errbuf); return (NULL); } /* * Iterate bookmarks to find the right one. */ errno = 0; if ((zfs_iter_bookmarks(pzhp, zfs_open_bookmarks_cb, &cb_data) == 0) && (cb_data.zhp == NULL)) { (void) zfs_error(hdl, EZFS_NOENT, errbuf); zfs_close(pzhp); return (NULL); } if (cb_data.zhp == NULL) { (void) zfs_standard_error(hdl, errno, errbuf); zfs_close(pzhp); return (NULL); } zhp = cb_data.zhp; /* * Cleanup. */ zfs_close(pzhp); } if (!(types & zhp->zfs_type)) { (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); return (NULL); } return (zhp); } /* * Release a ZFS handle. Nothing to do but free the associated memory. */ void zfs_close(zfs_handle_t *zhp) { if (zhp->zfs_mntopts) free(zhp->zfs_mntopts); nvlist_free(zhp->zfs_props); nvlist_free(zhp->zfs_user_props); nvlist_free(zhp->zfs_recvd_props); free(zhp); } typedef struct mnttab_node { struct mnttab mtn_mt; avl_node_t mtn_node; } mnttab_node_t; static int libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) { const mnttab_node_t *mtn1 = (const mnttab_node_t *)arg1; const mnttab_node_t *mtn2 = (const mnttab_node_t *)arg2; int rv; rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special); return (TREE_ISIGN(rv)); } void libzfs_mnttab_init(libzfs_handle_t *hdl) { pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL); assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); } static int libzfs_mnttab_update(libzfs_handle_t *hdl) { FILE *mnttab; struct mnttab entry; if ((mnttab = fopen(MNTTAB, "re")) == NULL) return (ENOENT); while (getmntent(mnttab, &entry) == 0) { mnttab_node_t *mtn; avl_index_t where; if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) continue; mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special); mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp); mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype); mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); /* Exclude duplicate mounts */ if (avl_find(&hdl->libzfs_mnttab_cache, mtn, &where) != NULL) { free(mtn->mtn_mt.mnt_special); free(mtn->mtn_mt.mnt_mountp); free(mtn->mtn_mt.mnt_fstype); free(mtn->mtn_mt.mnt_mntopts); free(mtn); continue; } avl_add(&hdl->libzfs_mnttab_cache, mtn); } (void) fclose(mnttab); return (0); } void libzfs_mnttab_fini(libzfs_handle_t *hdl) { void *cookie = NULL; mnttab_node_t *mtn; while ((mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie)) != NULL) { free(mtn->mtn_mt.mnt_special); free(mtn->mtn_mt.mnt_mountp); free(mtn->mtn_mt.mnt_fstype); free(mtn->mtn_mt.mnt_mntopts); free(mtn); } avl_destroy(&hdl->libzfs_mnttab_cache); (void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock); } void libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) { hdl->libzfs_mnttab_enable = enable; } int libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, struct mnttab *entry) { FILE *mnttab; mnttab_node_t find; mnttab_node_t *mtn; int ret = ENOENT; if (!hdl->libzfs_mnttab_enable) { struct mnttab srch = { 0 }; if (avl_numnodes(&hdl->libzfs_mnttab_cache)) libzfs_mnttab_fini(hdl); if ((mnttab = fopen(MNTTAB, "re")) == NULL) return (ENOENT); srch.mnt_special = (char *)fsname; srch.mnt_fstype = (char *)MNTTYPE_ZFS; ret = getmntany(mnttab, entry, &srch) ? ENOENT : 0; (void) fclose(mnttab); return (ret); } pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) { int error; if ((error = libzfs_mnttab_update(hdl)) != 0) { pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); return (error); } } find.mtn_mt.mnt_special = (char *)fsname; mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); if (mtn) { *entry = mtn->mtn_mt; ret = 0; } pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); return (ret); } void libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, const char *mountp, const char *mntopts) { mnttab_node_t *mtn; pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) { mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); /* * Another thread may have already added this entry * via libzfs_mnttab_update. If so we should skip it. */ if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL) { free(mtn->mtn_mt.mnt_special); free(mtn->mtn_mt.mnt_mountp); free(mtn->mtn_mt.mnt_fstype); free(mtn->mtn_mt.mnt_mntopts); free(mtn); } else { avl_add(&hdl->libzfs_mnttab_cache, mtn); } } pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); } void libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) { mnttab_node_t find; mnttab_node_t *ret; pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); find.mtn_mt.mnt_special = (char *)fsname; if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) != NULL) { avl_remove(&hdl->libzfs_mnttab_cache, ret); free(ret->mtn_mt.mnt_special); free(ret->mtn_mt.mnt_mountp); free(ret->mtn_mt.mnt_fstype); free(ret->mtn_mt.mnt_mntopts); free(ret); } pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); } int zfs_spa_version(zfs_handle_t *zhp, int *spa_version) { zpool_handle_t *zpool_handle = zhp->zpool_hdl; if (zpool_handle == NULL) return (-1); *spa_version = zpool_get_prop_int(zpool_handle, ZPOOL_PROP_VERSION, NULL); return (0); } /* * The choice of reservation property depends on the SPA version. */ static int zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) { int spa_version; if (zfs_spa_version(zhp, &spa_version) < 0) return (-1); if (spa_version >= SPA_VERSION_REFRESERVATION) *resv_prop = ZFS_PROP_REFRESERVATION; else *resv_prop = ZFS_PROP_RESERVATION; return (0); } /* * Given an nvlist of properties to set, validates that they are correct, and * parses any numeric properties (index, boolean, etc) if they are specified as * strings. */ nvlist_t * zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, boolean_t key_params_ok, const char *errbuf) { nvpair_t *elem; uint64_t intval; char *strval; zfs_prop_t prop; nvlist_t *ret; int chosen_normal = -1; int chosen_utf = -1; if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (NULL); } /* * Make sure this property is valid and applies to this type. */ elem = NULL; while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { const char *propname = nvpair_name(elem); prop = zfs_name_to_prop(propname); if (prop == ZPROP_USERPROP && zfs_prop_user(propname)) { /* * This is a user property: make sure it's a * string, and that it's less than ZAP_MAXNAMELEN. */ if (nvpair_type(elem) != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property name '%s' is too long"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } (void) nvpair_value_string(elem, &strval); if (nvlist_add_string(ret, propname, strval) != 0) { (void) no_memory(hdl); goto error; } continue; } /* * Currently, only user properties can be modified on * snapshots. */ if (type == ZFS_TYPE_SNAPSHOT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "this property can not be modified for snapshots")); (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); goto error; } if (prop == ZPROP_USERPROP && zfs_prop_userquota(propname)) { zfs_userquota_prop_t uqtype; char *newpropname = NULL; char domain[128]; uint64_t rid; uint64_t valary[3]; int rc; if (userquota_propname_decode(propname, zoned, &uqtype, domain, sizeof (domain), &rid) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' has an invalid user/group name"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (uqtype != ZFS_PROP_USERQUOTA && uqtype != ZFS_PROP_GROUPQUOTA && uqtype != ZFS_PROP_USEROBJQUOTA && uqtype != ZFS_PROP_GROUPOBJQUOTA && uqtype != ZFS_PROP_PROJECTQUOTA && uqtype != ZFS_PROP_PROJECTOBJQUOTA) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (nvpair_type(elem) == DATA_TYPE_STRING) { (void) nvpair_value_string(elem, &strval); if (strcmp(strval, "none") == 0) { intval = 0; } else if (zfs_nicestrtonum(hdl, strval, &intval) != 0) { (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { (void) nvpair_value_uint64(elem, &intval); if (intval == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "use 'none' to disable " "{user|group|project}quota")); goto error; } } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a number"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } /* * Encode the prop name as * userquota@-domain, to make it easy * for the kernel to decode. */ rc = asprintf(&newpropname, "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype], (longlong_t)rid, domain); if (rc == -1 || newpropname == NULL) { (void) no_memory(hdl); goto error; } valary[0] = uqtype; valary[1] = rid; valary[2] = intval; if (nvlist_add_uint64_array(ret, newpropname, valary, 3) != 0) { free(newpropname); (void) no_memory(hdl); goto error; } free(newpropname); continue; } else if (prop == ZPROP_USERPROP && zfs_prop_written(propname)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (prop == ZPROP_INVAL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (!zfs_prop_valid_for_type(prop, type, B_FALSE)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' does not " "apply to datasets of this type"), propname); (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); goto error; } if (zfs_prop_readonly(prop) && !(zfs_prop_setonce(prop) && zhp == NULL) && !(zfs_prop_encryption_key_param(prop) && key_params_ok)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (zprop_parse_value(hdl, elem, prop, type, ret, &strval, &intval, errbuf) != 0) goto error; /* * Perform some additional checks for specific properties. */ switch (prop) { case ZFS_PROP_VERSION: { int version; if (zhp == NULL) break; version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); if (intval < version) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Can not downgrade; already at version %u"), version); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; } case ZFS_PROP_VOLBLOCKSIZE: case ZFS_PROP_RECORDSIZE: { int maxbs = SPA_MAXBLOCKSIZE; char buf[64]; if (zpool_hdl != NULL) { maxbs = zpool_get_prop_int(zpool_hdl, ZPOOL_PROP_MAXBLOCKSIZE, NULL); } /* * The value must be a power of two between * SPA_MINBLOCKSIZE and maxbs. */ if (intval < SPA_MINBLOCKSIZE || intval > maxbs || !ISP2(intval)) { zfs_nicebytes(maxbs, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be power of 2 from 512B " "to %s"), propname, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; } case ZFS_PROP_SPECIAL_SMALL_BLOCKS: { int maxbs = SPA_OLD_MAXBLOCKSIZE; char buf[64]; if (zpool_hdl != NULL) { char state[64] = ""; maxbs = zpool_get_prop_int(zpool_hdl, ZPOOL_PROP_MAXBLOCKSIZE, NULL); /* * Issue a warning but do not fail so that * tests for settable properties succeed. */ if (zpool_prop_get_feature(zpool_hdl, "feature@allocation_classes", state, sizeof (state)) != 0 || strcmp(state, ZFS_FEATURE_ACTIVE) != 0) { (void) fprintf(stderr, gettext( "%s: property requires a special " "device in the pool\n"), propname); } } if (intval != 0 && (intval < SPA_MINBLOCKSIZE || intval > maxbs || !ISP2(intval))) { zfs_nicebytes(maxbs, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid '%s=%llu' property: must be zero " "or a power of 2 from 512B to %s"), propname, (unsigned long long)intval, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; } case ZFS_PROP_MLSLABEL: { #ifdef HAVE_MLSLABEL /* * Verify the mlslabel string and convert to * internal hex label string. */ m_label_t *new_sl; char *hex = NULL; /* internal label string */ /* Default value is already OK. */ if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) break; /* Verify the label can be converted to binary form */ if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) || (str_to_label(strval, &new_sl, MAC_LABEL, L_NO_CORRECTION, NULL) == -1)) { goto badlabel; } /* Now translate to hex internal label string */ if (label_to_str(new_sl, &hex, M_INTERNAL, DEF_NAMES) != 0) { if (hex) free(hex); goto badlabel; } m_label_free(new_sl); /* If string is already in internal form, we're done. */ if (strcmp(strval, hex) == 0) { free(hex); break; } /* Replace the label string with the internal form. */ (void) nvlist_remove(ret, zfs_prop_to_name(prop), DATA_TYPE_STRING); fnvlist_add_string(ret, zfs_prop_to_name(prop), hex); free(hex); break; badlabel: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid mlslabel '%s'"), strval); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); m_label_free(new_sl); /* OK if null */ goto error; #else zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "mlslabels are unsupported")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; #endif /* HAVE_MLSLABEL */ } case ZFS_PROP_MOUNTPOINT: { namecheck_err_t why; if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 || strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0) break; if (mountpoint_namecheck(strval, &why)) { switch (why) { case NAME_ERR_LEADING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be an absolute path, " "'none', or 'legacy'"), propname); break; case NAME_ERR_TOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "component of '%s' is too long"), propname); break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); break; } (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } zfs_fallthrough; } case ZFS_PROP_SHARESMB: case ZFS_PROP_SHARENFS: /* * For the mountpoint and sharenfs or sharesmb * properties, check if it can be set in a * global/non-global zone based on * the zoned property value: * * global zone non-global zone * -------------------------------------------------- * zoned=on mountpoint (no) mountpoint (yes) * sharenfs (no) sharenfs (no) * sharesmb (no) sharesmb (no) * * zoned=off mountpoint (yes) N/A * sharenfs (yes) * sharesmb (yes) */ if (zoned) { if (getzoneid() == GLOBAL_ZONEID) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set on " "dataset in a non-global zone"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } else if (prop == ZFS_PROP_SHARENFS || prop == ZFS_PROP_SHARESMB) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set in " "a non-global zone"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } } else if (getzoneid() != GLOBAL_ZONEID) { /* * If zoned property is 'off', this must be in * a global zone. If not, something is wrong. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set while dataset " "'zoned' property is set"), propname); (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } /* * At this point, it is legitimate to set the * property. Now we want to make sure that the * property value is valid if it is sharenfs. */ if ((prop == ZFS_PROP_SHARENFS || prop == ZFS_PROP_SHARESMB) && strcmp(strval, "on") != 0 && strcmp(strval, "off") != 0) { enum sa_protocol proto; if (prop == ZFS_PROP_SHARESMB) proto = SA_PROTOCOL_SMB; else proto = SA_PROTOCOL_NFS; if (sa_validate_shareopts(strval, proto) != SA_OK) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set to invalid " "options"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } break; case ZFS_PROP_KEYLOCATION: if (!zfs_prop_valid_keylocation(strval, B_FALSE)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid keylocation")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (zhp != NULL) { uint64_t crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); if (crypt == ZIO_CRYPT_OFF && strcmp(strval, "none") != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "keylocation must be 'none' " "for unencrypted datasets")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } else if (crypt != ZIO_CRYPT_OFF && strcmp(strval, "none") == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "keylocation must not be 'none' " "for encrypted datasets")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } break; case ZFS_PROP_PBKDF2_ITERS: if (intval < MIN_PBKDF2_ITERATIONS) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "minimum pbkdf2 iterations is %u"), MIN_PBKDF2_ITERATIONS); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; case ZFS_PROP_NORMALIZE: chosen_normal = (int)intval; break; default: break; } /* * For changes to existing volumes, we have some additional * checks to enforce. */ if (type == ZFS_TYPE_VOLUME && zhp != NULL) { uint64_t blocksize = zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE); char buf[64]; switch (prop) { case ZFS_PROP_VOLSIZE: if (intval % blocksize != 0) { zfs_nicebytes(blocksize, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a multiple of " "volume block size (%s)"), propname, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (intval == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be zero"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; default: break; } } /* check encryption properties */ if (zhp != NULL) { int64_t crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); switch (prop) { case ZFS_PROP_COPIES: if (crypt != ZIO_CRYPT_OFF && intval > 2) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "encrypted datasets cannot have " "3 copies")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; default: break; } } } /* * If normalization was chosen, but no UTF8 choice was made, * enforce rejection of non-UTF8 names. * * If normalization was chosen, but rejecting non-UTF8 names * was explicitly not chosen, it is an error. * * If utf8only was turned off, but the parent has normalization, * turn off normalization. */ if (chosen_normal > 0 && chosen_utf < 0) { if (nvlist_add_uint64(ret, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) { (void) no_memory(hdl); goto error; } } else if (chosen_normal > 0 && chosen_utf == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be set 'on' if normalization chosen"), zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } else if (chosen_normal < 0 && chosen_utf == 0) { if (nvlist_add_uint64(ret, zfs_prop_to_name(ZFS_PROP_NORMALIZE), 0) != 0) { (void) no_memory(hdl); goto error; } } return (ret); error: nvlist_free(ret); return (NULL); } static int zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) { uint64_t old_volsize; uint64_t new_volsize; uint64_t old_reservation; uint64_t new_reservation; zfs_prop_t resv_prop; nvlist_t *props; zpool_handle_t *zph = zpool_handle(zhp); /* * If this is an existing volume, and someone is setting the volsize, * make sure that it matches the reservation, or add it if necessary. */ old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_reservation = zfs_prop_get_int(zhp, resv_prop); props = fnvlist_alloc(); fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); if ((zvol_volsize_to_reservation(zph, old_volsize, props) != old_reservation) || nvlist_exists(nvl, zfs_prop_to_name(resv_prop))) { fnvlist_free(props); return (0); } if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &new_volsize) != 0) { fnvlist_free(props); return (-1); } new_reservation = zvol_volsize_to_reservation(zph, new_volsize, props); fnvlist_free(props); if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), new_reservation) != 0) { (void) no_memory(zhp->zfs_hdl); return (-1); } return (1); } /* * Helper for 'zfs {set|clone} refreservation=auto'. Must be called after * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinel value. * Return codes must match zfs_add_synthetic_resv(). */ static int zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl) { uint64_t volsize; uint64_t resvsize; zfs_prop_t prop; nvlist_t *props; if (!ZFS_IS_VOLUME(zhp)) { return (0); } if (zfs_which_resv_prop(zhp, &prop) != 0) { return (-1); } if (prop != ZFS_PROP_REFRESERVATION) { return (0); } if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(prop), &resvsize) != 0) { /* No value being set, so it can't be "auto" */ return (0); } if (resvsize != UINT64_MAX) { /* Being set to a value other than "auto" */ return (0); } props = fnvlist_alloc(); fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) { volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); } resvsize = zvol_volsize_to_reservation(zpool_handle(zhp), volsize, props); fnvlist_free(props); (void) nvlist_remove_all(nvl, zfs_prop_to_name(prop)); if (nvlist_add_uint64(nvl, zfs_prop_to_name(prop), resvsize) != 0) { (void) no_memory(zhp->zfs_hdl); return (-1); } return (1); } static boolean_t zfs_is_namespace_prop(zfs_prop_t prop) { switch (prop) { case ZFS_PROP_ATIME: case ZFS_PROP_RELATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_SETUID: case ZFS_PROP_READONLY: case ZFS_PROP_XATTR: case ZFS_PROP_NBMAND: return (B_TRUE); default: return (B_FALSE); } } /* * Given a property name and value, set the property for the given dataset. */ int zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) { int ret = -1; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *nvl = NULL; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), zhp->zfs_name); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 || nvlist_add_string(nvl, propname, propval) != 0) { (void) no_memory(hdl); goto error; } ret = zfs_prop_set_list(zhp, nvl); error: nvlist_free(nvl); return (ret); } /* * Given an nvlist of property names and values, set the properties for the * given dataset. */ int zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props) { zfs_cmd_t zc = {"\0"}; int ret = -1; prop_changelist_t **cls = NULL; int cl_idx; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *nvl; int nvl_len = 0; int added_resv = 0; zfs_prop_t prop = 0; nvpair_t *elem; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), zhp->zfs_name); if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, B_FALSE, errbuf)) == NULL) goto error; /* * We have to check for any extra properties which need to be added * before computing the length of the nvlist. */ for (elem = nvlist_next_nvpair(nvl, NULL); elem != NULL; elem = nvlist_next_nvpair(nvl, elem)) { if (zfs_name_to_prop(nvpair_name(elem)) == ZFS_PROP_VOLSIZE && (added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1) { goto error; } } if (added_resv != 1 && (added_resv = zfs_fix_auto_resv(zhp, nvl)) == -1) { goto error; } /* * Check how many properties we're setting and allocate an array to * store changelist pointers for postfix(). */ for (elem = nvlist_next_nvpair(nvl, NULL); elem != NULL; elem = nvlist_next_nvpair(nvl, elem)) nvl_len++; if ((cls = calloc(nvl_len, sizeof (prop_changelist_t *))) == NULL) goto error; cl_idx = 0; for (elem = nvlist_next_nvpair(nvl, NULL); elem != NULL; elem = nvlist_next_nvpair(nvl, elem)) { prop = zfs_name_to_prop(nvpair_name(elem)); assert(cl_idx < nvl_len); /* * We don't want to unmount & remount the dataset when changing * its canmount property to 'on' or 'noauto'. We only use * the changelist logic to unmount when setting canmount=off. */ if (prop != ZFS_PROP_CANMOUNT || (fnvpair_value_uint64(elem) == ZFS_CANMOUNT_OFF && zfs_is_mounted(zhp, NULL))) { cls[cl_idx] = changelist_gather(zhp, prop, 0, 0); if (cls[cl_idx] == NULL) goto error; } if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cls[cl_idx])) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } if (cls[cl_idx] != NULL && (ret = changelist_prefix(cls[cl_idx])) != 0) goto error; cl_idx++; } assert(cl_idx == nvl_len); /* * Execute the corresponding ioctl() to set this list of properties. */ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zcmd_write_src_nvlist(hdl, &zc, nvl); zcmd_alloc_dst_nvlist(hdl, &zc, 0); ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); if (ret != 0) { if (zc.zc_nvlist_dst_filled == B_FALSE) { (void) zfs_standard_error(hdl, errno, errbuf); goto error; } /* Get the list of unset properties back and report them. */ nvlist_t *errorprops = NULL; if (zcmd_read_dst_nvlist(hdl, &zc, &errorprops) != 0) goto error; for (nvpair_t *elem = nvlist_next_nvpair(errorprops, NULL); elem != NULL; elem = nvlist_next_nvpair(errorprops, elem)) { prop = zfs_name_to_prop(nvpair_name(elem)); zfs_setprop_error(hdl, prop, errno, errbuf); } nvlist_free(errorprops); if (added_resv && errno == ENOSPC) { /* clean up the volsize property we tried to set */ uint64_t old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); nvlist_free(nvl); nvl = NULL; zcmd_free_nvlists(&zc); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) goto error; if (nvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), old_volsize) != 0) goto error; zcmd_write_src_nvlist(hdl, &zc, nvl); (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); } } else { for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) { if (cls[cl_idx] != NULL) { int clp_err = changelist_postfix(cls[cl_idx]); if (clp_err != 0) ret = clp_err; } } if (ret == 0) { /* * Refresh the statistics so the new property * value is reflected. */ (void) get_stats(zhp); /* * Remount the filesystem to propagate the change * if one of the options handled by the generic * Linux namespace layer has been modified. */ if (zfs_is_namespace_prop(prop) && zfs_is_mounted(zhp, NULL)) ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0); } } error: nvlist_free(nvl); zcmd_free_nvlists(&zc); if (cls != NULL) { for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) { if (cls[cl_idx] != NULL) changelist_free(cls[cl_idx]); } free(cls); } return (ret); } /* * Given a property, inherit the value from the parent dataset, or if received * is TRUE, revert to the received value, if any. */ int zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) { zfs_cmd_t zc = {"\0"}; int ret; prop_changelist_t *cl; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[ERRBUFLEN]; zfs_prop_t prop; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s'"), propname, zhp->zfs_name); zc.zc_cookie = received; if ((prop = zfs_name_to_prop(propname)) == ZPROP_USERPROP) { /* * For user properties, the amount of work we have to do is very * small, so just do it here. */ if (!zfs_prop_user(propname)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0) return (zfs_standard_error(hdl, errno, errbuf)); (void) get_stats(zhp); return (0); } /* * Verify that this property is inheritable. */ if (zfs_prop_readonly(prop)) return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); if (!zfs_prop_inheritable(prop) && !received) return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); /* * Check to see if the value applies to this type */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE)) return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); /* * Normalize the name, to get rid of shorthand abbreviations. */ propname = zfs_prop_to_name(prop); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is used in a non-global zone")); return (zfs_error(hdl, EZFS_ZONED, errbuf)); } /* * Determine datasets which will be affected by this change, if any. */ if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) return (-1); if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } if ((ret = changelist_prefix(cl)) != 0) goto error; if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) { return (zfs_standard_error(hdl, errno, errbuf)); } else { if ((ret = changelist_postfix(cl)) != 0) goto error; /* * Refresh the statistics so the new property is reflected. */ (void) get_stats(zhp); /* * Remount the filesystem to propagate the change * if one of the options handled by the generic * Linux namespace layer has been modified. */ if (zfs_is_namespace_prop(prop) && zfs_is_mounted(zhp, NULL)) ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0); } error: changelist_free(cl); return (ret); } /* * True DSL properties are stored in an nvlist. The following two functions * extract them appropriately. */ uint64_t getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; uint64_t value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { value = fnvlist_lookup_uint64(nv, ZPROP_VALUE); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); value = zfs_prop_default_numeric(prop); *source = (char *)""; } return (value); } static const char * getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; const char *value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { value = fnvlist_lookup_string(nv, ZPROP_VALUE); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); value = zfs_prop_default_string(prop); *source = (char *)""; } return (value); } static boolean_t zfs_is_recvd_props_mode(zfs_handle_t *zhp) { return (zhp->zfs_props == zhp->zfs_recvd_props); } static void zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) { *cookie = (uint64_t)(uintptr_t)zhp->zfs_props; zhp->zfs_props = zhp->zfs_recvd_props; } static void zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) { zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie; *cookie = 0; } /* * Internal function for getting a numeric property. Both zfs_prop_get() and * zfs_prop_get_int() are built using this interface. * * Certain properties can be overridden using 'mount -o'. In this case, scan * the contents of the /proc/self/mounts entry, searching for the * appropriate options. If they differ from the on-disk values, report the * current values and mark the source "temporary". */ static int get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, char **source, uint64_t *val) { zfs_cmd_t zc = {"\0"}; nvlist_t *zplprops = NULL; struct mnttab mnt; const char *mntopt_on = NULL; const char *mntopt_off = NULL; boolean_t received = zfs_is_recvd_props_mode(zhp); *source = NULL; /* * If the property is being fetched for a snapshot, check whether * the property is valid for the snapshot's head dataset type. */ if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT && !zfs_prop_valid_for_type(prop, zhp->zfs_head_type, B_TRUE)) { *val = zfs_prop_default_numeric(prop); return (-1); } switch (prop) { case ZFS_PROP_ATIME: mntopt_on = MNTOPT_ATIME; mntopt_off = MNTOPT_NOATIME; break; case ZFS_PROP_RELATIME: mntopt_on = MNTOPT_RELATIME; mntopt_off = MNTOPT_NORELATIME; break; case ZFS_PROP_DEVICES: mntopt_on = MNTOPT_DEVICES; mntopt_off = MNTOPT_NODEVICES; break; case ZFS_PROP_EXEC: mntopt_on = MNTOPT_EXEC; mntopt_off = MNTOPT_NOEXEC; break; case ZFS_PROP_READONLY: mntopt_on = MNTOPT_RO; mntopt_off = MNTOPT_RW; break; case ZFS_PROP_SETUID: mntopt_on = MNTOPT_SETUID; mntopt_off = MNTOPT_NOSETUID; break; case ZFS_PROP_XATTR: mntopt_on = MNTOPT_XATTR; mntopt_off = MNTOPT_NOXATTR; break; case ZFS_PROP_NBMAND: mntopt_on = MNTOPT_NBMAND; mntopt_off = MNTOPT_NONBMAND; break; default: break; } /* * Because looking up the mount options is potentially expensive * (iterating over all of /proc/self/mounts), we defer its * calculation until we're looking up a property which requires * its presence. */ if (!zhp->zfs_mntcheck && (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) { libzfs_handle_t *hdl = zhp->zfs_hdl; struct mnttab entry; if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) zhp->zfs_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); zhp->zfs_mntcheck = B_TRUE; } if (zhp->zfs_mntopts == NULL) mnt.mnt_mntopts = (char *)""; else mnt.mnt_mntopts = zhp->zfs_mntopts; switch (prop) { case ZFS_PROP_ATIME: case ZFS_PROP_RELATIME: case ZFS_PROP_DEVICES: case ZFS_PROP_EXEC: case ZFS_PROP_READONLY: case ZFS_PROP_SETUID: #ifndef __FreeBSD__ case ZFS_PROP_XATTR: #endif case ZFS_PROP_NBMAND: *val = getprop_uint64(zhp, prop, source); if (received) break; if (hasmntopt(&mnt, mntopt_on) && !*val) { *val = B_TRUE; if (src) *src = ZPROP_SRC_TEMPORARY; } else if (hasmntopt(&mnt, mntopt_off) && *val) { *val = B_FALSE; if (src) *src = ZPROP_SRC_TEMPORARY; } break; case ZFS_PROP_CANMOUNT: case ZFS_PROP_VOLSIZE: case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: case ZFS_PROP_FILESYSTEM_LIMIT: case ZFS_PROP_SNAPSHOT_LIMIT: case ZFS_PROP_FILESYSTEM_COUNT: case ZFS_PROP_SNAPSHOT_COUNT: *val = getprop_uint64(zhp, prop, source); if (*source == NULL) { /* not default, must be local */ *source = zhp->zfs_name; } break; case ZFS_PROP_MOUNTED: *val = (zhp->zfs_mntopts != NULL); break; case ZFS_PROP_NUMCLONES: *val = zhp->zfs_dmustats.dds_num_clones; break; case ZFS_PROP_VERSION: case ZFS_PROP_NORMALIZE: case ZFS_PROP_UTF8ONLY: case ZFS_PROP_CASE: zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) { zcmd_free_nvlists(&zc); if (prop == ZFS_PROP_VERSION && zhp->zfs_type == ZFS_TYPE_VOLUME) *val = zfs_prop_default_numeric(prop); return (-1); } if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 || nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop), val) != 0) { zcmd_free_nvlists(&zc); return (-1); } nvlist_free(zplprops); zcmd_free_nvlists(&zc); break; case ZFS_PROP_INCONSISTENT: *val = zhp->zfs_dmustats.dds_inconsistent; break; case ZFS_PROP_REDACTED: *val = zhp->zfs_dmustats.dds_redacted; break; case ZFS_PROP_CREATETXG: /* * We can directly read createtxg property from zfs * handle for Filesystem, Snapshot and ZVOL types. */ if ((zhp->zfs_type == ZFS_TYPE_FILESYSTEM) || (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) || (zhp->zfs_type == ZFS_TYPE_VOLUME)) { *val = zhp->zfs_dmustats.dds_creation_txg; break; } zfs_fallthrough; default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: case PROP_TYPE_INDEX: *val = getprop_uint64(zhp, prop, source); /* * If we tried to use a default value for a * readonly property, it means that it was not * present. Note this only applies to "truly" * readonly properties, not set-once properties * like volblocksize. */ if (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop) && *source != NULL && (*source)[0] == '\0') { *source = NULL; return (-1); } break; case PROP_TYPE_STRING: default: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "cannot get non-numeric property")); return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "internal error"))); } } return (0); } /* * Calculate the source type, given the raw source string. */ static void get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, char *statbuf, size_t statlen) { if (statbuf == NULL || srctype == NULL || *srctype == ZPROP_SRC_TEMPORARY) { return; } if (source == NULL) { *srctype = ZPROP_SRC_NONE; } else if (source[0] == '\0') { *srctype = ZPROP_SRC_DEFAULT; } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) { *srctype = ZPROP_SRC_RECEIVED; } else { if (strcmp(source, zhp->zfs_name) == 0) { *srctype = ZPROP_SRC_LOCAL; } else { (void) strlcpy(statbuf, source, statlen); *srctype = ZPROP_SRC_INHERITED; } } } int zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf, size_t proplen, boolean_t literal) { zfs_prop_t prop; int err = 0; if (zhp->zfs_recvd_props == NULL) if (get_recvd_props_ioctl(zhp) != 0) return (-1); prop = zfs_name_to_prop(propname); if (prop != ZPROP_USERPROP) { uint64_t cookie; if (!nvlist_exists(zhp->zfs_recvd_props, propname)) return (-1); zfs_set_recvd_props_mode(zhp, &cookie); err = zfs_prop_get(zhp, prop, propbuf, proplen, NULL, NULL, 0, literal); zfs_unset_recvd_props_mode(zhp, &cookie); } else { nvlist_t *propval; char *recvdval; if (nvlist_lookup_nvlist(zhp->zfs_recvd_props, propname, &propval) != 0) return (-1); recvdval = fnvlist_lookup_string(propval, ZPROP_VALUE); (void) strlcpy(propbuf, recvdval, proplen); } return (err == 0 ? 0 : -1); } static int get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) { nvlist_t *value; nvpair_t *pair; value = zfs_get_clones_nvl(zhp); if (value == NULL || nvlist_empty(value)) return (-1); propbuf[0] = '\0'; for (pair = nvlist_next_nvpair(value, NULL); pair != NULL; pair = nvlist_next_nvpair(value, pair)) { if (propbuf[0] != '\0') (void) strlcat(propbuf, ",", proplen); (void) strlcat(propbuf, nvpair_name(pair), proplen); } return (0); } struct get_clones_arg { uint64_t numclones; nvlist_t *value; const char *origin; char buf[ZFS_MAX_DATASET_NAME_LEN]; }; static int get_clones_cb(zfs_handle_t *zhp, void *arg) { struct get_clones_arg *gca = arg; if (gca->numclones == 0) { zfs_close(zhp); return (0); } if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf), NULL, NULL, 0, B_TRUE) != 0) goto out; if (strcmp(gca->buf, gca->origin) == 0) { fnvlist_add_boolean(gca->value, zfs_get_name(zhp)); gca->numclones--; } out: (void) zfs_iter_children(zhp, get_clones_cb, gca); zfs_close(zhp); return (0); } nvlist_t * zfs_get_clones_nvl(zfs_handle_t *zhp) { nvlist_t *nv, *value; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) { struct get_clones_arg gca; /* * if this is a snapshot, then the kernel wasn't able * to get the clones. Do it by slowly iterating. */ if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) return (NULL); if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0) return (NULL); if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) { nvlist_free(nv); return (NULL); } gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES); gca.value = value; gca.origin = zhp->zfs_name; if (gca.numclones != 0) { zfs_handle_t *root; char pool[ZFS_MAX_DATASET_NAME_LEN]; char *cp = pool; /* get the pool name */ (void) strlcpy(pool, zhp->zfs_name, sizeof (pool)); (void) strsep(&cp, "/@"); root = zfs_open(zhp->zfs_hdl, pool, ZFS_TYPE_FILESYSTEM); if (root == NULL) { nvlist_free(nv); nvlist_free(value); return (NULL); } (void) get_clones_cb(root, &gca); } if (gca.numclones != 0 || nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 || nvlist_add_nvlist(zhp->zfs_props, zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) { nvlist_free(nv); nvlist_free(value); return (NULL); } nvlist_free(nv); nvlist_free(value); nv = fnvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(ZFS_PROP_CLONES)); } return (fnvlist_lookup_nvlist(nv, ZPROP_VALUE)); } static int get_rsnaps_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) { nvlist_t *value; uint64_t *snaps; uint_t nsnaps; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &value) != 0) return (-1); if (nvlist_lookup_uint64_array(value, ZPROP_VALUE, &snaps, &nsnaps) != 0) return (-1); if (nsnaps == 0) { /* There's no redaction snapshots; pass a special value back */ (void) snprintf(propbuf, proplen, "none"); return (0); } propbuf[0] = '\0'; for (int i = 0; i < nsnaps; i++) { char buf[128]; if (propbuf[0] != '\0') (void) strlcat(propbuf, ",", proplen); (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)snaps[i]); (void) strlcat(propbuf, buf, proplen); } return (0); } /* * Accepts a property and value and checks that the value * matches the one found by the channel program. If they are * not equal, print both of them. */ static void zcp_check(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t intval, const char *strval) { if (!zhp->zfs_hdl->libzfs_prop_debug) return; int error; char *poolname = zhp->zpool_hdl->zpool_name; const char *prop_name = zfs_prop_to_name(prop); const char *program = "args = ...\n" "ds = args['dataset']\n" "prop = args['property']\n" "value, setpoint = zfs.get_prop(ds, prop)\n" "return {value=value, setpoint=setpoint}\n"; nvlist_t *outnvl; nvlist_t *retnvl; nvlist_t *argnvl = fnvlist_alloc(); fnvlist_add_string(argnvl, "dataset", zhp->zfs_name); fnvlist_add_string(argnvl, "property", zfs_prop_to_name(prop)); error = lzc_channel_program_nosync(poolname, program, 10 * 1000 * 1000, 10 * 1024 * 1024, argnvl, &outnvl); if (error == 0) { retnvl = fnvlist_lookup_nvlist(outnvl, "return"); if (zfs_prop_get_type(prop) == PROP_TYPE_NUMBER) { int64_t ans; error = nvlist_lookup_int64(retnvl, "value", &ans); if (error != 0) { (void) fprintf(stderr, "%s: zcp check error: " "%u\n", prop_name, error); return; } if (ans != intval) { (void) fprintf(stderr, "%s: zfs found %llu, " "but zcp found %llu\n", prop_name, (u_longlong_t)intval, (u_longlong_t)ans); } } else { char *str_ans; error = nvlist_lookup_string(retnvl, "value", &str_ans); if (error != 0) { (void) fprintf(stderr, "%s: zcp check error: " "%u\n", prop_name, error); return; } if (strcmp(strval, str_ans) != 0) { (void) fprintf(stderr, "%s: zfs found '%s', but zcp found '%s'\n", prop_name, strval, str_ans); } } } else { (void) fprintf(stderr, "%s: zcp check failed, channel program " "error: %u\n", prop_name, error); } nvlist_free(argnvl); nvlist_free(outnvl); } /* * Retrieve a property from the given object. If 'literal' is specified, then * numbers are left as exact values. Otherwise, numbers are converted to a * human-readable form. * * Returns 0 on success, or -1 on error. */ int zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal) { char *source = NULL; uint64_t val; const char *str; const char *strval; boolean_t received = zfs_is_recvd_props_mode(zhp); /* * Check to see if this property applies to our object */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE)) return (-1); if (received && zfs_prop_readonly(prop)) return (-1); if (src) *src = ZPROP_SRC_NONE; switch (prop) { case ZFS_PROP_CREATION: /* * 'creation' is a time_t stored in the statistics. We convert * this into a string unless 'literal' is specified. */ { val = getprop_uint64(zhp, prop, &source); time_t time = (time_t)val; struct tm t; if (literal || localtime_r(&time, &t) == NULL || strftime(propbuf, proplen, "%a %b %e %k:%M %Y", &t) == 0) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); } zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_MOUNTPOINT: /* * Getting the precise mountpoint can be tricky. * * - for 'none' or 'legacy', return those values. * - for inherited mountpoints, we want to take everything * after our ancestor and append it to the inherited value. * * If the pool has an alternate root, we want to prepend that * root to any values we return. */ str = getprop_string(zhp, prop, &source); if (str[0] == '/') { char buf[MAXPATHLEN]; char *root = buf; const char *relpath; /* * If we inherit the mountpoint, even from a dataset * with a received value, the source will be the path of * the dataset we inherit from. If source is * ZPROP_SOURCE_VAL_RECVD, the received value is not * inherited. */ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { relpath = ""; } else { relpath = zhp->zfs_name + strlen(source); if (relpath[0] == '/') relpath++; } if ((zpool_get_prop(zhp->zpool_hdl, ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL, B_FALSE)) || (strcmp(root, "-") == 0)) root[0] = '\0'; /* * Special case an alternate root of '/'. This will * avoid having multiple leading slashes in the * mountpoint path. */ if (strcmp(root, "/") == 0) root++; /* * If the mountpoint is '/' then skip over this * if we are obtaining either an alternate root or * an inherited mountpoint. */ if (str[1] == '\0' && (root[0] != '\0' || relpath[0] != '\0')) str++; if (relpath[0] == '\0') (void) snprintf(propbuf, proplen, "%s%s", root, str); else (void) snprintf(propbuf, proplen, "%s%s%s%s", root, str, relpath[0] == '@' ? "" : "/", relpath); } else { /* 'legacy' or 'none' */ (void) strlcpy(propbuf, str, proplen); } zcp_check(zhp, prop, 0, propbuf); break; case ZFS_PROP_ORIGIN: str = getprop_string(zhp, prop, &source); if (str == NULL) return (-1); (void) strlcpy(propbuf, str, proplen); zcp_check(zhp, prop, 0, str); break; case ZFS_PROP_REDACT_SNAPS: if (get_rsnaps_string(zhp, propbuf, proplen) != 0) return (-1); break; case ZFS_PROP_CLONES: if (get_clones_string(zhp, propbuf, proplen) != 0) return (-1); break; case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); /* * If quota or reservation is 0, we translate this into 'none' * (unless literal is set), and indicate that it's the default * value. Otherwise, we print the number nicely and indicate * that its set locally. */ if (val == 0) { if (literal) (void) strlcpy(propbuf, "0", proplen); else (void) strlcpy(propbuf, "none", proplen); } else { if (literal) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); else zfs_nicebytes(val, propbuf, proplen); } zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_FILESYSTEM_LIMIT: case ZFS_PROP_SNAPSHOT_LIMIT: case ZFS_PROP_FILESYSTEM_COUNT: case ZFS_PROP_SNAPSHOT_COUNT: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); /* * If limit is UINT64_MAX, we translate this into 'none', and * indicate that it's the default value. Otherwise, we print * the number nicely and indicate that it's set locally. */ if (val == UINT64_MAX) { (void) strlcpy(propbuf, "none", proplen); } else if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); } else { zfs_nicenum(val, propbuf, proplen); } zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_REFRATIO: case ZFS_PROP_COMPRESSRATIO: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); if (literal) (void) snprintf(propbuf, proplen, "%llu.%02llu", (u_longlong_t)(val / 100), (u_longlong_t)(val % 100)); else (void) snprintf(propbuf, proplen, "%llu.%02llux", (u_longlong_t)(val / 100), (u_longlong_t)(val % 100)); zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_TYPE: switch (zhp->zfs_type) { case ZFS_TYPE_FILESYSTEM: str = "filesystem"; break; case ZFS_TYPE_VOLUME: str = "volume"; break; case ZFS_TYPE_SNAPSHOT: str = "snapshot"; break; case ZFS_TYPE_BOOKMARK: str = "bookmark"; break; default: abort(); } (void) snprintf(propbuf, proplen, "%s", str); zcp_check(zhp, prop, 0, propbuf); break; case ZFS_PROP_MOUNTED: /* * The 'mounted' property is a pseudo-property that described * whether the filesystem is currently mounted. Even though * it's a boolean value, the typical values of "on" and "off" * don't make sense, so we translate to "yes" and "no". */ if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, src, &source, &val) != 0) return (-1); if (val) (void) strlcpy(propbuf, "yes", proplen); else (void) strlcpy(propbuf, "no", proplen); break; case ZFS_PROP_NAME: /* * The 'name' property is a pseudo-property derived from the * dataset name. It is presented as a real property to simplify * consumers. */ (void) strlcpy(propbuf, zhp->zfs_name, proplen); zcp_check(zhp, prop, 0, propbuf); break; case ZFS_PROP_MLSLABEL: { #ifdef HAVE_MLSLABEL m_label_t *new_sl = NULL; char *ascii = NULL; /* human readable label */ (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); if (literal || (strcasecmp(propbuf, ZFS_MLSLABEL_DEFAULT) == 0)) break; /* * Try to translate the internal hex string to * human-readable output. If there are any * problems just use the hex string. */ if (str_to_label(propbuf, &new_sl, MAC_LABEL, L_NO_CORRECTION, NULL) == -1) { m_label_free(new_sl); break; } if (label_to_str(new_sl, &ascii, M_LABEL, DEF_NAMES) != 0) { if (ascii) free(ascii); m_label_free(new_sl); break; } m_label_free(new_sl); (void) strlcpy(propbuf, ascii, proplen); free(ascii); #else (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), proplen); #endif /* HAVE_MLSLABEL */ } break; case ZFS_PROP_GUID: case ZFS_PROP_KEY_GUID: case ZFS_PROP_IVSET_GUID: case ZFS_PROP_CREATETXG: case ZFS_PROP_OBJSETID: case ZFS_PROP_PBKDF2_ITERS: /* * These properties are stored as numbers, but they are * identifiers or counters. * We don't want them to be pretty printed, because pretty * printing truncates their values making them useless. */ if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_REFERENCED: case ZFS_PROP_AVAILABLE: case ZFS_PROP_USED: case ZFS_PROP_USEDSNAP: case ZFS_PROP_USEDDS: case ZFS_PROP_USEDREFRESERV: case ZFS_PROP_USEDCHILD: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); } else { zfs_nicebytes(val, propbuf, proplen); } zcp_check(zhp, prop, val, NULL); break; case ZFS_PROP_SNAPSHOTS_CHANGED: { if ((get_numeric_property(zhp, prop, src, &source, &val) != 0) || val == 0) { return (-1); } time_t time = (time_t)val; struct tm t; if (literal || localtime_r(&time, &t) == NULL || - strftime(propbuf, proplen, "%a %b %e %k:%M %Y", + strftime(propbuf, proplen, "%a %b %e %k:%M:%S %Y", &t) == 0) (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); } zcp_check(zhp, prop, val, NULL); break; default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) { return (-1); } if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); } else { zfs_nicenum(val, propbuf, proplen); } zcp_check(zhp, prop, val, NULL); break; case PROP_TYPE_STRING: str = getprop_string(zhp, prop, &source); if (str == NULL) return (-1); (void) strlcpy(propbuf, str, proplen); zcp_check(zhp, prop, 0, str); break; case PROP_TYPE_INDEX: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); if (zfs_prop_index_to_string(prop, val, &strval) != 0) return (-1); (void) strlcpy(propbuf, strval, proplen); zcp_check(zhp, prop, 0, strval); break; default: abort(); } } get_source(zhp, src, source, statbuf, statlen); return (0); } /* * Utility function to get the given numeric property. Does no validation that * the given property is the appropriate type; should only be used with * hard-coded property types. */ uint64_t zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) { char *source; uint64_t val = 0; (void) get_numeric_property(zhp, prop, NULL, &source, &val); return (val); } static int zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val) { char buf[64]; (void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val); return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf)); } /* * Similar to zfs_prop_get(), but returns the value as an integer. */ int zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, zprop_source_t *src, char *statbuf, size_t statlen) { char *source; /* * Check to see if this property applies to our object */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE)) { return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE, dgettext(TEXT_DOMAIN, "cannot get property '%s'"), zfs_prop_to_name(prop))); } if (src) *src = ZPROP_SRC_NONE; if (get_numeric_property(zhp, prop, src, &source, value) != 0) return (-1); get_source(zhp, src, source, statbuf, statlen); return (0); } #ifdef HAVE_IDMAP static int idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser, char **domainp, idmap_rid_t *ridp) { idmap_get_handle_t *get_hdl = NULL; idmap_stat status; int err = EINVAL; if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS) goto out; if (isuser) { err = idmap_get_sidbyuid(get_hdl, id, IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); } else { err = idmap_get_sidbygid(get_hdl, id, IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); } if (err == IDMAP_SUCCESS && idmap_get_mappings(get_hdl) == IDMAP_SUCCESS && status == IDMAP_SUCCESS) err = 0; else err = EINVAL; out: if (get_hdl) idmap_get_destroy(get_hdl); return (err); } #endif /* HAVE_IDMAP */ /* * convert the propname into parameters needed by kernel * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829 * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789 * Eg: groupquota@staff -> ZFS_PROP_GROUPQUOTA, "", 1234 * Eg: groupused@staff -> ZFS_PROP_GROUPUSED, "", 1234 * Eg: projectquota@123 -> ZFS_PROP_PROJECTQUOTA, "", 123 * Eg: projectused@789 -> ZFS_PROP_PROJECTUSED, "", 789 */ static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp) { zfs_userquota_prop_t type; char *cp; boolean_t isuser; boolean_t isgroup; boolean_t isproject; struct passwd *pw; struct group *gr; domain[0] = '\0'; /* Figure out the property type ({user|group|project}{quota|space}) */ for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) { if (strncmp(propname, zfs_userquota_prop_prefixes[type], strlen(zfs_userquota_prop_prefixes[type])) == 0) break; } if (type == ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); *typep = type; isuser = (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_USERUSED || type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_USEROBJUSED); isgroup = (type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_GROUPUSED || type == ZFS_PROP_GROUPOBJQUOTA || type == ZFS_PROP_GROUPOBJUSED); isproject = (type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED || type == ZFS_PROP_PROJECTOBJQUOTA || type == ZFS_PROP_PROJECTOBJUSED); cp = strchr(propname, '@') + 1; if (isuser && (pw = getpwnam(cp)) != NULL) { if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); *ridp = pw->pw_uid; } else if (isgroup && (gr = getgrnam(cp)) != NULL) { if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); *ridp = gr->gr_gid; } else if (!isproject && strchr(cp, '@')) { #ifdef HAVE_IDMAP /* * It's a SID name (eg "user@domain") that needs to be * turned into S-1-domainID-RID. */ directory_error_t e; char *numericsid = NULL; char *end; if (zoned && getzoneid() == GLOBAL_ZONEID) return (ENOENT); if (isuser) { e = directory_sid_from_user_name(NULL, cp, &numericsid); } else { e = directory_sid_from_group_name(NULL, cp, &numericsid); } if (e != NULL) { directory_error_free(e); return (ENOENT); } if (numericsid == NULL) return (ENOENT); cp = numericsid; (void) strlcpy(domain, cp, domainlen); cp = strrchr(domain, '-'); *cp = '\0'; cp++; errno = 0; *ridp = strtoull(cp, &end, 10); free(numericsid); if (errno != 0 || *end != '\0') return (EINVAL); #else (void) domainlen; return (ENOSYS); #endif /* HAVE_IDMAP */ } else { /* It's a user/group/project ID (eg "12345"). */ uid_t id; char *end; id = strtoul(cp, &end, 10); if (*end != '\0') return (EINVAL); if (id > MAXUID && !isproject) { #ifdef HAVE_IDMAP /* It's an ephemeral ID. */ idmap_rid_t rid; char *mapdomain; if (idmap_id_to_numeric_domain_rid(id, isuser, &mapdomain, &rid) != 0) return (ENOENT); (void) strlcpy(domain, mapdomain, domainlen); *ridp = rid; #else return (ENOSYS); #endif /* HAVE_IDMAP */ } else { *ridp = id; } } return (0); } static int zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue, zfs_userquota_prop_t *typep) { int err; zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); err = userquota_propname_decode(propname, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid); zc.zc_objset_type = *typep; if (err) return (err); err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_USERSPACE_ONE, &zc); if (err) return (err); *propvalue = zc.zc_cookie; return (0); } int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) { zfs_userquota_prop_t type; return (zfs_prop_get_userquota_common(zhp, propname, propvalue, &type)); } int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal) { int err; uint64_t propvalue; zfs_userquota_prop_t type; err = zfs_prop_get_userquota_common(zhp, propname, &propvalue, &type); if (err) return (err); if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)propvalue); } else if (propvalue == 0 && (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA || type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTOBJQUOTA)) { (void) strlcpy(propbuf, "none", proplen); } else if (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_USERUSED || type == ZFS_PROP_GROUPUSED || type == ZFS_PROP_PROJECTUSED || type == ZFS_PROP_PROJECTQUOTA) { zfs_nicebytes(propvalue, propbuf, proplen); } else { zfs_nicenum(propvalue, propbuf, proplen); } return (0); } /* * propname must start with "written@" or "written#". */ int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) { int err; zfs_cmd_t zc = {"\0"}; const char *snapname; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); assert(zfs_prop_written(propname)); snapname = propname + strlen("written@"); if (strchr(snapname, '@') != NULL || strchr(snapname, '#') != NULL) { /* full snapshot or bookmark name specified */ (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); } else { /* snapname is the short name, append it to zhp's fsname */ char *cp; (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); cp = strchr(zc.zc_value, '@'); if (cp != NULL) *cp = '\0'; (void) strlcat(zc.zc_value, snapname - 1, sizeof (zc.zc_value)); } err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SPACE_WRITTEN, &zc); if (err) return (err); *propvalue = zc.zc_cookie; return (0); } int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal) { int err; uint64_t propvalue; err = zfs_prop_get_written_int(zhp, propname, &propvalue); if (err) return (err); if (literal) { (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)propvalue); } else { zfs_nicebytes(propvalue, propbuf, proplen); } return (0); } /* * Returns the name of the given zfs handle. */ const char * zfs_get_name(const zfs_handle_t *zhp) { return (zhp->zfs_name); } /* * Returns the name of the parent pool for the given zfs handle. */ const char * zfs_get_pool_name(const zfs_handle_t *zhp) { return (zhp->zpool_hdl->zpool_name); } /* * Returns the type of the given zfs handle. */ zfs_type_t zfs_get_type(const zfs_handle_t *zhp) { return (zhp->zfs_type); } /* * Returns the type of the given zfs handle, * or, if a snapshot, the type of the snapshotted dataset. */ zfs_type_t zfs_get_underlying_type(const zfs_handle_t *zhp) { return (zhp->zfs_head_type); } /* * Is one dataset name a child dataset of another? * * Needs to handle these cases: * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo" * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar" * Descendant? No. No. No. Yes. */ static boolean_t is_descendant(const char *ds1, const char *ds2) { size_t d1len = strlen(ds1); /* ds2 can't be a descendant if it's smaller */ if (strlen(ds2) < d1len) return (B_FALSE); /* otherwise, compare strings and verify that there's a '/' char */ return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0)); } /* * Given a complete name, return just the portion that refers to the parent. * Will return -1 if there is no parent (path is just the name of the * pool). */ static int parent_name(const char *path, char *buf, size_t buflen) { char *slashp; (void) strlcpy(buf, path, buflen); if ((slashp = strrchr(buf, '/')) == NULL) return (-1); *slashp = '\0'; return (0); } int zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen) { return (parent_name(zfs_get_name(zhp), buf, buflen)); } /* * If accept_ancestor is false, then check to make sure that the given path has * a parent, and that it exists. If accept_ancestor is true, then find the * closest existing ancestor for the given path. In prefixlen return the * length of already existing prefix of the given path. We also fetch the * 'zoned' property, which is used to validate property settings when creating * new datasets. */ static int check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, boolean_t accept_ancestor, int *prefixlen) { zfs_cmd_t zc = {"\0"}; char parent[ZFS_MAX_DATASET_NAME_LEN]; char *slash; zfs_handle_t *zhp; char errbuf[ERRBUFLEN]; uint64_t is_zoned; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); /* get parent, and check to see if this is just a pool */ if (parent_name(path, parent, sizeof (parent)) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing dataset name")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* check to see if the pool exists */ if ((slash = strchr(parent, '/')) == NULL) slash = parent + strlen(parent); (void) strncpy(zc.zc_name, parent, slash - parent); zc.zc_name[slash - parent] = '\0'; if (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0 && errno == ENOENT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool '%s'"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* check to see if the parent dataset exists */ while ((zhp = make_dataset_handle(hdl, parent)) == NULL) { if (errno == ENOENT && accept_ancestor) { /* * Go deeper to find an ancestor, give up on top level. */ if (parent_name(parent, parent, sizeof (parent)) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool '%s'"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } } else if (errno == ENOENT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent does not exist")); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } else return (zfs_standard_error(hdl, errno, errbuf)); } is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); if (zoned != NULL) *zoned = is_zoned; /* we are in a non-global zone, but parent is in the global zone */ if (getzoneid() != GLOBAL_ZONEID && !is_zoned) { (void) zfs_standard_error(hdl, EPERM, errbuf); zfs_close(zhp); return (-1); } /* make sure parent is a filesystem */ if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "parent is not a filesystem")); (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); return (-1); } zfs_close(zhp); if (prefixlen != NULL) *prefixlen = strlen(parent); return (0); } /* * Finds whether the dataset of the given type(s) exists. */ boolean_t zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types) { zfs_handle_t *zhp; if (!zfs_validate_name(hdl, path, types, B_FALSE)) return (B_FALSE); /* * Try to get stats for the dataset, which will tell us if it exists. */ if ((zhp = make_dataset_handle(hdl, path)) != NULL) { int ds_type = zhp->zfs_type; zfs_close(zhp); if (types & ds_type) return (B_TRUE); } return (B_FALSE); } /* * Given a path to 'target', create all the ancestors between * the prefixlen portion of the path, and the target itself. * Fail if the initial prefixlen-ancestor does not already exist. */ int create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) { zfs_handle_t *h; char *cp; const char *opname; /* make sure prefix exists */ cp = target + prefixlen; if (*cp != '/') { assert(strchr(cp, '/') == NULL); h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); } else { *cp = '\0'; h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); *cp = '/'; } if (h == NULL) return (-1); zfs_close(h); /* * Attempt to create, mount, and share any ancestor filesystems, * up to the prefixlen-long one. */ for (cp = target + prefixlen + 1; (cp = strchr(cp, '/')) != NULL; *cp = '/', cp++) { *cp = '\0'; h = make_dataset_handle(hdl, target); if (h) { /* it already exists, nothing to do here */ zfs_close(h); continue; } if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, NULL) != 0) { opname = dgettext(TEXT_DOMAIN, "create"); goto ancestorerr; } h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); if (h == NULL) { opname = dgettext(TEXT_DOMAIN, "open"); goto ancestorerr; } if (zfs_mount(h, NULL, 0) != 0) { opname = dgettext(TEXT_DOMAIN, "mount"); goto ancestorerr; } if (zfs_share(h, NULL) != 0) { opname = dgettext(TEXT_DOMAIN, "share"); goto ancestorerr; } zfs_close(h); } zfs_commit_shares(NULL); return (0); ancestorerr: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to %s ancestor '%s'"), opname, target); return (-1); } /* * Creates non-existing ancestors of the given path. */ int zfs_create_ancestors(libzfs_handle_t *hdl, const char *path) { int prefix; char *path_copy; char errbuf[ERRBUFLEN]; int rc = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); /* * Check that we are not passing the nesting limit * before we start creating any ancestors. */ if (dataset_nestcheck(path) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "maximum name nesting depth exceeded")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0) return (-1); if ((path_copy = strdup(path)) != NULL) { rc = create_parents(hdl, path_copy, prefix); free(path_copy); } if (path_copy == NULL || rc != 0) return (-1); return (0); } /* * Create a new filesystem or volume. */ int zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, nvlist_t *props) { int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); uint64_t zoned; enum lzc_dataset_type ost; zpool_handle_t *zpool_handle; uint8_t *wkeydata = NULL; uint_t wkeylen = 0; char errbuf[ERRBUFLEN]; char parent[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); /* validate the path, taking care to note the extended error message */ if (!zfs_validate_name(hdl, path, type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); if (dataset_nestcheck(path) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "maximum name nesting depth exceeded")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* validate parents exist */ if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0) return (-1); /* * The failure modes when creating a dataset of a different type over * one that already exists is a little strange. In particular, if you * try to create a dataset on top of an existing dataset, the ioctl() * will return ENOENT, not EEXIST. To prevent this from happening, we * first try to see if the dataset exists. */ if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (type == ZFS_TYPE_VOLUME) ost = LZC_DATSET_TYPE_ZVOL; else ost = LZC_DATSET_TYPE_ZFS; /* open zpool handle for prop validation */ char pool_path[ZFS_MAX_DATASET_NAME_LEN]; (void) strlcpy(pool_path, path, sizeof (pool_path)); /* truncate pool_path at first slash */ char *p = strchr(pool_path, '/'); if (p != NULL) *p = '\0'; if ((zpool_handle = zpool_open(hdl, pool_path)) == NULL) return (-1); if (props && (props = zfs_valid_proplist(hdl, type, props, zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) { zpool_close(zpool_handle); return (-1); } zpool_close(zpool_handle); if (type == ZFS_TYPE_VOLUME) { /* * If we are creating a volume, the size and block size must * satisfy a few restraints. First, the blocksize must be a * valid block size between SPA_{MIN,MAX}BLOCKSIZE. Second, the * volsize must be a multiple of the block size, and cannot be * zero. */ if (props == NULL || nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing volume size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } if ((ret = nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &blocksize)) != 0) { if (ret == ENOENT) { blocksize = zfs_prop_default_numeric( ZFS_PROP_VOLBLOCKSIZE); } else { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "missing volume block size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } } if (size == 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume size cannot be zero")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } if (size % blocksize != 0) { nvlist_free(props); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume size must be a multiple of volume block " "size")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } } (void) parent_name(path, parent, sizeof (parent)); if (zfs_crypto_create(hdl, parent, props, NULL, B_TRUE, &wkeydata, &wkeylen) != 0) { nvlist_free(props); return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); } /* create the dataset */ ret = lzc_create(path, ost, props, wkeydata, wkeylen); nvlist_free(props); if (wkeydata != NULL) free(wkeydata); /* check for failure */ if (ret != 0) { switch (errno) { case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such parent '%s'"), parent); return (zfs_error(hdl, EZFS_NOENT, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); case EACCES: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "encryption root's key is not loaded " "or provided")); return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); case ERANGE: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property value(s) specified")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); #ifdef _ILP32 case EOVERFLOW: /* * This platform can't address a volume this big. */ if (type == ZFS_TYPE_VOLUME) return (zfs_error(hdl, EZFS_VOLTOOBIG, errbuf)); zfs_fallthrough; #endif default: return (zfs_standard_error(hdl, errno, errbuf)); } } return (0); } /* * Destroys the given dataset. The caller must make sure that the filesystem * isn't mounted, and that there are no active dependents. If the file system * does not exist this function does nothing. */ int zfs_destroy(zfs_handle_t *zhp, boolean_t defer) { int error; if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT && defer) return (EINVAL); if (zhp->zfs_type == ZFS_TYPE_BOOKMARK) { nvlist_t *nv = fnvlist_alloc(); fnvlist_add_boolean(nv, zhp->zfs_name); error = lzc_destroy_bookmarks(nv, NULL); fnvlist_free(nv); if (error != 0) { return (zfs_standard_error_fmt(zhp->zfs_hdl, error, dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zfs_name)); } return (0); } if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { nvlist_t *nv = fnvlist_alloc(); fnvlist_add_boolean(nv, zhp->zfs_name); error = lzc_destroy_snaps(nv, defer, NULL); fnvlist_free(nv); } else { error = lzc_destroy(zhp->zfs_name); } if (error != 0 && error != ENOENT) { return (zfs_standard_error_fmt(zhp->zfs_hdl, errno, dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zfs_name)); } remove_mountpoint(zhp); return (0); } struct destroydata { nvlist_t *nvl; const char *snapname; }; static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, dd->snapname) >= sizeof (name)) return (EINVAL); if (lzc_exists(name)) fnvlist_add_boolean(dd->nvl, name); rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); zfs_close(zhp); return (rv); } /* * Destroys all snapshots with the given name in zhp & descendants. */ int zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) { int ret; struct destroydata dd = { 0 }; dd.snapname = snapname; dd.nvl = fnvlist_alloc(); (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); if (nvlist_empty(dd.nvl)) { ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); } else { ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer); } fnvlist_free(dd.nvl); return (ret); } /* * Destroys all the snapshots named in the nvlist. */ int zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) { nvlist_t *errlist = NULL; nvpair_t *pair; int ret = zfs_destroy_snaps_nvl_os(hdl, snaps); if (ret != 0) return (ret); ret = lzc_destroy_snaps(snaps, defer, &errlist); if (ret == 0) { nvlist_free(errlist); return (0); } if (nvlist_empty(errlist)) { char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); ret = zfs_standard_error(hdl, ret, errbuf); } for (pair = nvlist_next_nvpair(errlist, NULL); pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), nvpair_name(pair)); switch (fnvpair_value_int32(pair)) { case EEXIST: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshot is cloned")); ret = zfs_error(hdl, EZFS_EXISTS, errbuf); break; default: ret = zfs_standard_error(hdl, errno, errbuf); break; } } nvlist_free(errlist); return (ret); } /* * Clones the given dataset. The target must be of the same type as the source. */ int zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) { char parent[ZFS_MAX_DATASET_NAME_LEN]; int ret; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; uint64_t zoned; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), target); /* validate the target/clone name */ if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents exist */ if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0) return (-1); (void) parent_name(target, parent, sizeof (parent)); /* do the clone */ if (props) { zfs_type_t type = ZFS_TYPE_FILESYSTEM; if (ZFS_IS_VOLUME(zhp)) type = ZFS_TYPE_VOLUME; if ((props = zfs_valid_proplist(hdl, type, props, zoned, zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL) return (-1); if (zfs_fix_auto_resv(zhp, props) == -1) { nvlist_free(props); return (-1); } } if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) { nvlist_free(props); return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); } ret = lzc_clone(target, zhp->zfs_name, props); nvlist_free(props); if (ret != 0) { switch (errno) { case ENOENT: /* * The parent doesn't exist. We should have caught this * above, but there may a race condition that has since * destroyed the parent. * * At this point, we don't know whether it's the source * that doesn't exist anymore, or whether the target * dataset doesn't exist. */ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "no such parent '%s'"), parent); return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); case EXDEV: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "source and target pools differ")); return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, errbuf)); default: return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } } return (ret); } /* * Promotes the given clone fs to be the clone parent. */ int zfs_promote(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; char snapname[ZFS_MAX_DATASET_NAME_LEN]; int ret; char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot promote '%s'"), zhp->zfs_name); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshots can not be promoted")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } if (zhp->zfs_dmustats.dds_origin[0] == '\0') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not a cloned filesystem")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); ret = lzc_promote(zhp->zfs_name, snapname, sizeof (snapname)); if (ret != 0) { switch (ret) { case EACCES: /* * Promoting encrypted dataset outside its * encryption root. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot promote dataset outside its " "encryption root")); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); case EEXIST: /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "conflicting snapshot '%s' from parent '%s'"), snapname, zhp->zfs_dmustats.dds_origin); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); default: return (zfs_standard_error(hdl, ret, errbuf)); } } return (ret); } typedef struct snapdata { nvlist_t *sd_nvl; const char *sd_snapname; } snapdata_t; static int zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) { snapdata_t *sd = arg; char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) == 0) { if (snprintf(name, sizeof (name), "%s@%s", zfs_get_name(zhp), sd->sd_snapname) >= sizeof (name)) return (EINVAL); fnvlist_add_boolean(sd->sd_nvl, name); rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); } zfs_close(zhp); return (rv); } /* * Creates snapshots. The keys in the snaps nvlist are the snapshots to be * created. */ int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) { int ret; char errbuf[ERRBUFLEN]; nvpair_t *elem; nvlist_t *errors; zpool_handle_t *zpool_hdl; char pool[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshots ")); elem = NULL; while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { const char *snapname = nvpair_name(elem); /* validate the target name */ if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, B_TRUE)) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshot '%s'"), snapname); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } } /* * get pool handle for prop validation. assumes all snaps are in the * same pool, as does lzc_snapshot (below). */ elem = nvlist_next_nvpair(snaps, NULL); (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); pool[strcspn(pool, "/@")] = '\0'; zpool_hdl = zpool_open(hdl, pool); if (zpool_hdl == NULL) return (-1); if (props != NULL && (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) { zpool_close(zpool_hdl); return (-1); } zpool_close(zpool_hdl); ret = lzc_snapshot(snaps, props, &errors); if (ret != 0) { boolean_t printed = B_FALSE; for (elem = nvlist_next_nvpair(errors, NULL); elem != NULL; elem = nvlist_next_nvpair(errors, elem)) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshot '%s'"), nvpair_name(elem)); (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); printed = B_TRUE; } if (!printed) { switch (ret) { case EXDEV: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "multiple snapshots of same " "fs not allowed")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); break; default: (void) zfs_standard_error(hdl, ret, errbuf); } } } nvlist_free(props); nvlist_free(errors); return (ret); } int zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, nvlist_t *props) { int ret; snapdata_t sd = { 0 }; char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *cp; zfs_handle_t *zhp; char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot snapshot %s"), path); if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); (void) strlcpy(fsname, path, sizeof (fsname)); cp = strchr(fsname, '@'); *cp = '\0'; sd.sd_snapname = cp + 1; if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { return (-1); } sd.sd_nvl = fnvlist_alloc(); if (recursive) { (void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); } else { fnvlist_add_boolean(sd.sd_nvl, path); } ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); fnvlist_free(sd.sd_nvl); zfs_close(zhp); return (ret); } /* * Destroy any more recent snapshots. We invoke this callback on any dependents * of the snapshot first. If the 'cb_dependent' member is non-zero, then this * is a dependent and we should just destroy it without checking the transaction * group. */ typedef struct rollback_data { const char *cb_target; /* the snapshot */ uint64_t cb_create; /* creation time reference */ boolean_t cb_error; boolean_t cb_force; } rollback_data_t; static int rollback_destroy_dependent(zfs_handle_t *zhp, void *data) { rollback_data_t *cbp = data; prop_changelist_t *clp; /* We must destroy this clone; first unmount it */ clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, cbp->cb_force ? MS_FORCE: 0); if (clp == NULL || changelist_prefix(clp) != 0) { cbp->cb_error = B_TRUE; zfs_close(zhp); return (0); } if (zfs_destroy(zhp, B_FALSE) != 0) cbp->cb_error = B_TRUE; else changelist_remove(clp, zhp->zfs_name); (void) changelist_postfix(clp); changelist_free(clp); zfs_close(zhp); return (0); } static int rollback_destroy(zfs_handle_t *zhp, void *data) { rollback_data_t *cbp = data; if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, rollback_destroy_dependent, cbp); cbp->cb_error |= zfs_destroy(zhp, B_FALSE); } zfs_close(zhp); return (0); } /* * Given a dataset, rollback to a specific snapshot, discarding any * data changes since then and making it the active dataset. * * Any snapshots and bookmarks more recent than the target are * destroyed, along with their dependents (i.e. clones). */ int zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) { rollback_data_t cb = { 0 }; int err; boolean_t restore_resv = 0; uint64_t old_volsize = 0, new_volsize; zfs_prop_t resv_prop = { 0 }; uint64_t min_txg = 0; assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM || zhp->zfs_type == ZFS_TYPE_VOLUME); /* * Destroy all recent snapshots and their dependents. */ cb.cb_force = force; cb.cb_target = snap->zfs_name; cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); if (cb.cb_create > 0) min_txg = cb.cb_create; (void) zfs_iter_snapshots(zhp, B_FALSE, rollback_destroy, &cb, min_txg, 0); (void) zfs_iter_bookmarks(zhp, rollback_destroy, &cb); if (cb.cb_error) return (-1); /* * Now that we have verified that the snapshot is the latest, * rollback to the given snapshot. */ if (zhp->zfs_type == ZFS_TYPE_VOLUME) { if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); restore_resv = (old_volsize == zfs_prop_get_int(zhp, resv_prop)); } /* * Pass both the filesystem and the wanted snapshot names, * we would get an error back if the snapshot is destroyed or * a new snapshot is created before this request is processed. */ err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name); if (err != 0) { char errbuf[ERRBUFLEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rollback '%s'"), zhp->zfs_name); switch (err) { case EEXIST: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "there is a snapshot or bookmark more recent " "than '%s'"), snap->zfs_name); (void) zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf); break; case ESRCH: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "'%s' is not found among snapshots of '%s'"), snap->zfs_name, zhp->zfs_name); (void) zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf); break; case EINVAL: (void) zfs_error(zhp->zfs_hdl, EZFS_BADTYPE, errbuf); break; default: (void) zfs_standard_error(zhp->zfs_hdl, err, errbuf); } return (err); } /* * For volumes, if the pre-rollback volsize matched the pre- * rollback reservation and the volsize has changed then set * the reservation property to the post-rollback volsize. * Make a new handle since the rollback closed the dataset. */ if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { if (restore_resv) { new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (old_volsize != new_volsize) err = zfs_prop_set_int(zhp, resv_prop, new_volsize); } zfs_close(zhp); } return (err); } /* * Renames the given dataset. */ int zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) { int ret = 0; zfs_cmd_t zc = {"\0"}; char *delim; prop_changelist_t *cl = NULL; char parent[ZFS_MAX_DATASET_NAME_LEN]; char property[ZFS_MAXPROPLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[ERRBUFLEN]; /* if we have the same exact name, just return success */ if (strcmp(zhp->zfs_name, target) == 0) return (0); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename to '%s'"), target); /* make sure source name is valid */ if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* * Make sure the target name is valid */ if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { if ((strchr(target, '@') == NULL) || *target == '@') { /* * Snapshot target name is abbreviated, * reconstruct full dataset name */ (void) strlcpy(parent, zhp->zfs_name, sizeof (parent)); delim = strchr(parent, '@'); if (strchr(target, '@') == NULL) *(++delim) = '\0'; else *delim = '\0'; (void) strlcat(parent, target, sizeof (parent)); target = parent; } else { /* * Make sure we're renaming within the same dataset. */ delim = strchr(target, '@'); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '@') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "snapshots must be part of same " "dataset")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } } if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } else { if (flags.recursive) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "recursive rename must be a snapshot")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents */ if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0) return (-1); /* make sure we're in the same pool */ verify((delim = strchr(target, '/')) != NULL); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '/') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "datasets must be within same pool")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } /* new name cannot be a child of the current dataset name */ if (is_descendant(zhp->zfs_name, target)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "New dataset name cannot be a descendant of " "current dataset name")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } } (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name); if (getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is used in a non-global zone")); return (zfs_error(hdl, EZFS_ZONED, errbuf)); } /* * Avoid unmounting file systems with mountpoint property set to * 'legacy' or 'none' even if -u option is not given. */ if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && !flags.recursive && !flags.nounmount && zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, property, sizeof (property), NULL, NULL, 0, B_FALSE) == 0 && (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0)) { flags.nounmount = B_TRUE; } if (flags.recursive) { char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); delim = strchr(parentname, '@'); *delim = '\0'; zfs_handle_t *zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); free(parentname); if (zhrp == NULL) { ret = -1; goto error; } zfs_close(zhrp); } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, flags.nounmount ? CL_GATHER_DONT_UNMOUNT : CL_GATHER_ITER_MOUNTED, flags.forceunmount ? MS_FORCE : 0)) == NULL) return (-1); if (changelist_haszonedchild(cl)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "child dataset with inherited mountpoint is used " "in a non-global zone")); (void) zfs_error(hdl, EZFS_ZONED, errbuf); ret = -1; goto error; } if ((ret = changelist_prefix(cl)) != 0) goto error; } if (ZFS_IS_VOLUME(zhp)) zc.zc_objset_type = DMU_OST_ZVOL; else zc.zc_objset_type = DMU_OST_ZFS; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); zc.zc_cookie = !!flags.recursive; zc.zc_cookie |= (!!flags.nounmount) << 1; if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) { /* * if it was recursive, the one that actually failed will * be in zc.zc_name */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zc.zc_name); if (flags.recursive && errno == EEXIST) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "a child dataset already has a snapshot " "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); } else if (errno == EACCES) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot move encrypted child outside of " "its encryption root")); (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); } /* * On failure, we still want to remount any filesystems that * were previously mounted, so we don't alter the system state. */ if (cl != NULL) (void) changelist_postfix(cl); } else { if (cl != NULL) { changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } } error: if (cl != NULL) { changelist_free(cl); } return (ret); } nvlist_t * zfs_get_all_props(zfs_handle_t *zhp) { return (zhp->zfs_props); } nvlist_t * zfs_get_recvd_props(zfs_handle_t *zhp) { if (zhp->zfs_recvd_props == NULL) if (get_recvd_props_ioctl(zhp) != 0) return (NULL); return (zhp->zfs_recvd_props); } nvlist_t * zfs_get_user_props(zfs_handle_t *zhp) { return (zhp->zfs_user_props); } /* * This function is used by 'zfs list' to determine the exact set of columns to * display, and their maximum widths. This does two main things: * * - If this is a list of all properties, then expand the list to include * all native properties, and set a flag so that for each dataset we look * for new unique user properties and add them to the list. * * - For non fixed-width properties, keep track of the maximum width seen * so that we can size the column appropriately. If the user has * requested received property values, we also need to compute the width * of the RECEIVED column. */ int zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received, boolean_t literal) { libzfs_handle_t *hdl = zhp->zfs_hdl; zprop_list_t *entry; zprop_list_t **last, **start; nvlist_t *userprops, *propval; nvpair_t *elem; char *strval; char buf[ZFS_MAXPROPLEN]; if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0) return (-1); userprops = zfs_get_user_props(zhp); entry = *plp; if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) { /* * Go through and add any user properties as necessary. We * start by incrementing our list pointer to the first * non-native property. */ start = plp; while (*start != NULL) { if ((*start)->pl_prop == ZPROP_USERPROP) break; start = &(*start)->pl_next; } elem = NULL; while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) { /* * See if we've already found this property in our list. */ for (last = start; *last != NULL; last = &(*last)->pl_next) { if (strcmp((*last)->pl_user_prop, nvpair_name(elem)) == 0) break; } if (*last == NULL) { entry = zfs_alloc(hdl, sizeof (zprop_list_t)); entry->pl_user_prop = zfs_strdup(hdl, nvpair_name(elem)); entry->pl_prop = ZPROP_USERPROP; entry->pl_width = strlen(nvpair_name(elem)); entry->pl_all = B_TRUE; *last = entry; } } } /* * Now go through and check the width of any non-fixed columns */ for (entry = *plp; entry != NULL; entry = entry->pl_next) { if (entry->pl_fixed && !literal) continue; if (entry->pl_prop != ZPROP_USERPROP) { if (zfs_prop_get(zhp, entry->pl_prop, buf, sizeof (buf), NULL, NULL, 0, literal) == 0) { if (strlen(buf) > entry->pl_width) entry->pl_width = strlen(buf); } if (received && zfs_prop_get_recvd(zhp, zfs_prop_to_name(entry->pl_prop), buf, sizeof (buf), literal) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } else { if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop, &propval) == 0) { strval = fnvlist_lookup_string(propval, ZPROP_VALUE); if (strlen(strval) > entry->pl_width) entry->pl_width = strlen(strval); } if (received && zfs_prop_get_recvd(zhp, entry->pl_user_prop, buf, sizeof (buf), literal) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } } return (0); } void zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) { nvpair_t *curr; nvpair_t *next; /* * Keep a reference to the props-table against which we prune the * properties. */ zhp->zfs_props_table = props; curr = nvlist_next_nvpair(zhp->zfs_props, NULL); while (curr) { zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr)); next = nvlist_next_nvpair(zhp->zfs_props, curr); /* * User properties will result in ZPROP_USERPROP (an alias * for ZPROP_INVAL), and since we * only know how to prune standard ZFS properties, we always * leave these in the list. This can also happen if we * encounter an unknown DSL property (when running older * software, for example). */ if (zfs_prop != ZPROP_USERPROP && props[zfs_prop] == B_FALSE) (void) nvlist_remove(zhp->zfs_props, nvpair_name(curr), nvpair_type(curr)); curr = next; } } static int zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, zfs_smb_acl_op_t cmd, char *resource1, char *resource2) { zfs_cmd_t zc = {"\0"}; nvlist_t *nvlist = NULL; int error; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); zc.zc_cookie = (uint64_t)cmd; if (cmd == ZFS_SMB_ACL_RENAME) { if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (0); } } switch (cmd) { case ZFS_SMB_ACL_ADD: case ZFS_SMB_ACL_REMOVE: (void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string)); break; case ZFS_SMB_ACL_RENAME: if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC, resource1) != 0) { (void) no_memory(hdl); return (-1); } if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET, resource2) != 0) { (void) no_memory(hdl); return (-1); } zcmd_write_src_nvlist(hdl, &zc, nvlist); break; case ZFS_SMB_ACL_PURGE: break; default: return (-1); } error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc); nvlist_free(nvlist); return (error); } int zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset, char *path, char *resource) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD, resource, NULL)); } int zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset, char *path, char *resource) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE, resource, NULL)); } int zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE, NULL, NULL)); } int zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path, char *oldname, char *newname) { return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME, oldname, newname)); } int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zfs_userspace_cb_t func, void *arg) { zfs_cmd_t zc = {"\0"}; zfs_useracct_t buf[100]; libzfs_handle_t *hdl = zhp->zfs_hdl; int ret; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zc.zc_objset_type = type; zc.zc_nvlist_dst = (uintptr_t)buf; for (;;) { zfs_useracct_t *zua = buf; zc.zc_nvlist_dst_size = sizeof (buf); if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) { if ((errno == ENOTSUP && (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED || type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA || type == ZFS_PROP_PROJECTOBJUSED || type == ZFS_PROP_PROJECTOBJQUOTA || type == ZFS_PROP_PROJECTUSED || type == ZFS_PROP_PROJECTQUOTA))) break; return (zfs_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get used/quota for %s"), zc.zc_name)); } if (zc.zc_nvlist_dst_size == 0) break; while (zc.zc_nvlist_dst_size > 0) { if ((ret = func(arg, zua->zu_domain, zua->zu_rid, zua->zu_space)) != 0) return (ret); zua++; zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t); } } return (0); } struct holdarg { nvlist_t *nvl; const char *snapname; const char *tag; boolean_t recursive; int error; }; static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname) >= sizeof (name)) return (EINVAL); if (lzc_exists(name)) fnvlist_add_string(ha->nvl, name, ha->tag); if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); zfs_close(zhp); return (rv); } int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive, int cleanup_fd) { int ret; struct holdarg ha; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; ha.tag = tag; ha.recursive = recursive; (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); if (nvlist_empty(ha.nvl)) { char errbuf[ERRBUFLEN]; fnvlist_free(ha.nvl); ret = ENOENT; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold snapshot '%s@%s'"), zhp->zfs_name, snapname); (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); return (ret); } ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); fnvlist_free(ha.nvl); return (ret); } int zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) { int ret; nvlist_t *errors; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[ERRBUFLEN]; nvpair_t *elem; errors = NULL; ret = lzc_hold(holds, cleanup_fd, &errors); if (ret == 0) { /* There may be errors even in the success case. */ fnvlist_free(errors); return (0); } if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold")); switch (ret) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); break; default: (void) zfs_standard_error(hdl, ret, errbuf); } } for (elem = nvlist_next_nvpair(errors, NULL); elem != NULL; elem = nvlist_next_nvpair(errors, elem)) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold snapshot '%s'"), nvpair_name(elem)); switch (fnvpair_value_int32(elem)) { case E2BIG: /* * Temporary tags wind up having the ds object id * prepended. So even if we passed the length check * above, it's still possible for the tag to wind * up being slightly too long. */ (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf); break; case EINVAL: (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case EEXIST: (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); break; default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); } } fnvlist_free(errors); return (ret); } static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; nvlist_t *existing_holds; if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname) >= sizeof (name)) { ha->error = EINVAL; rv = EINVAL; } if (lzc_get_holds(name, &existing_holds) != 0) { ha->error = ENOENT; } else if (!nvlist_exists(existing_holds, ha->tag)) { ha->error = ESRCH; } else { nvlist_t *torelease = fnvlist_alloc(); fnvlist_add_boolean(torelease, ha->tag); fnvlist_add_nvlist(ha->nvl, name, torelease); fnvlist_free(torelease); } if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); zfs_close(zhp); return (rv); } int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) { int ret; struct holdarg ha; nvlist_t *errors = NULL; nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[ERRBUFLEN]; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; ha.tag = tag; ha.recursive = recursive; ha.error = 0; (void) zfs_release_one(zfs_handle_dup(zhp), &ha); if (nvlist_empty(ha.nvl)) { fnvlist_free(ha.nvl); ret = ha.error; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release hold from snapshot '%s@%s'"), zhp->zfs_name, snapname); if (ret == ESRCH) { (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); } else { (void) zfs_standard_error(hdl, ret, errbuf); } return (ret); } ret = lzc_release(ha.nvl, &errors); fnvlist_free(ha.nvl); if (ret == 0) { /* There may be errors even in the success case. */ fnvlist_free(errors); return (0); } if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release")); switch (errno) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; default: (void) zfs_standard_error(hdl, errno, errbuf); } } for (elem = nvlist_next_nvpair(errors, NULL); elem != NULL; elem = nvlist_next_nvpair(errors, elem)) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release hold from snapshot '%s'"), nvpair_name(elem)); switch (fnvpair_value_int32(elem)) { case ESRCH: (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); break; case EINVAL: (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); break; default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); } } fnvlist_free(errors); return (ret); } int zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; int err = 0; char errbuf[ERRBUFLEN]; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); tryagain: nvbuf = malloc(nvsz); if (nvbuf == NULL) { err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); goto out; } zc.zc_nvlist_dst_size = nvsz; zc.zc_nvlist_dst = (uintptr_t)nvbuf; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), zc.zc_name); switch (errno) { case ENOMEM: free(nvbuf); nvsz = zc.zc_nvlist_dst_size; goto tryagain; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error(hdl, errno, errbuf); break; } } else { /* success */ int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); if (rc) { err = zfs_standard_error_fmt(hdl, rc, dgettext( TEXT_DOMAIN, "cannot get permissions on '%s'"), zc.zc_name); } } free(nvbuf); out: return (err); } int zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; char *nvbuf; char errbuf[ERRBUFLEN]; size_t nvsz; int err; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); assert(err == 0); nvbuf = malloc(nvsz); err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); assert(err == 0); zc.zc_nvlist_src_size = nvsz; zc.zc_nvlist_src = (uintptr_t)nvbuf; zc.zc_perm_action = un; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), zc.zc_name); switch (errno) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error(hdl, errno, errbuf); break; } } free(nvbuf); return (err); } int zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) { int err; char errbuf[ERRBUFLEN]; err = lzc_get_holds(zhp->zfs_name, nvl); if (err != 0) { libzfs_handle_t *hdl = zhp->zfs_hdl; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), zhp->zfs_name); switch (err) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: err = zfs_error(hdl, EZFS_BADTYPE, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); break; default: err = zfs_standard_error(hdl, errno, errbuf); break; } } return (err); } /* * The theory of raidz space accounting * * The "referenced" property of RAIDZ vdevs is scaled such that a 128KB block * will "reference" 128KB, even though it allocates more than that, to store the * parity information (and perhaps skip sectors). This concept of the * "referenced" (and other DMU space accounting) being lower than the allocated * space by a constant factor is called "raidz deflation." * * As mentioned above, the constant factor for raidz deflation assumes a 128KB * block size. However, zvols typically have a much smaller block size (default * 8KB). These smaller blocks may require proportionally much more parity * information (and perhaps skip sectors). In this case, the change to the * "referenced" property may be much more than the logical block size. * * Suppose a raidz vdev has 5 disks with ashift=12. A 128k block may be written * as follows. * * +-------+-------+-------+-------+-------+ * | disk1 | disk2 | disk3 | disk4 | disk5 | * +-------+-------+-------+-------+-------+ * | P0 | D0 | D8 | D16 | D24 | * | P1 | D1 | D9 | D17 | D25 | * | P2 | D2 | D10 | D18 | D26 | * | P3 | D3 | D11 | D19 | D27 | * | P4 | D4 | D12 | D20 | D28 | * | P5 | D5 | D13 | D21 | D29 | * | P6 | D6 | D14 | D22 | D30 | * | P7 | D7 | D15 | D23 | D31 | * +-------+-------+-------+-------+-------+ * * Above, notice that 160k was allocated: 8 x 4k parity sectors + 32 x 4k data * sectors. The dataset's referenced will increase by 128k and the pool's * allocated and free properties will be adjusted by 160k. * * A 4k block written to the same raidz vdev will require two 4k sectors. The * blank cells represent unallocated space. * * +-------+-------+-------+-------+-------+ * | disk1 | disk2 | disk3 | disk4 | disk5 | * +-------+-------+-------+-------+-------+ * | P0 | D0 | | | | * +-------+-------+-------+-------+-------+ * * Above, notice that the 4k block required one sector for parity and another * for data. vdev_raidz_asize() will return 8k and as such the pool's allocated * and free properties will be adjusted by 8k. The dataset will not be charged * 8k. Rather, it will be charged a value that is scaled according to the * overhead of the 128k block on the same vdev. This 8k allocation will be * charged 8k * 128k / 160k. 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as * calculated in the 128k block example above. * * Every raidz allocation is sized to be a multiple of nparity+1 sectors. That * is, every raidz1 allocation will be a multiple of 2 sectors, raidz2 * allocations are a multiple of 3 sectors, and raidz3 allocations are a * multiple of of 4 sectors. When a block does not fill the required number of * sectors, skip blocks (sectors) are used. * * An 8k block being written to a raidz vdev may be written as follows: * * +-------+-------+-------+-------+-------+ * | disk1 | disk2 | disk3 | disk4 | disk5 | * +-------+-------+-------+-------+-------+ * | P0 | D0 | D1 | S0 | | * +-------+-------+-------+-------+-------+ * * In order to maintain the nparity+1 allocation size, a skip block (S0) was * added. For this 8k block, the pool's allocated and free properties are * adjusted by 16k and the dataset's referenced is increased by 16k * 128k / * 160k. Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in * the 128k block example above. * * The situation is slightly different for dRAID since the minimum allocation * size is the full group width. The same 8K block above would be written as * follows in a dRAID group: * * +-------+-------+-------+-------+-------+ * | disk1 | disk2 | disk3 | disk4 | disk5 | * +-------+-------+-------+-------+-------+ * | P0 | D0 | D1 | S0 | S1 | * +-------+-------+-------+-------+-------+ * * Compression may lead to a variety of block sizes being written for the same * volume or file. There is no clear way to reserve just the amount of space * that will be required, so the worst case (no compression) is assumed. * Note that metadata blocks will typically be compressed, so the reservation * size returned by zvol_volsize_to_reservation() will generally be slightly * larger than the maximum that the volume can reference. */ /* * Derived from function of same name in module/zfs/vdev_raidz.c. Returns the * amount of space (in bytes) that will be allocated for the specified block * size. Note that the "referenced" space accounted will be less than this, but * not necessarily equal to "blksize", due to RAIDZ deflation. */ static uint64_t vdev_raidz_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift, uint64_t blksize) { uint64_t asize, ndata; ASSERT3U(ndisks, >, nparity); ndata = ndisks - nparity; asize = ((blksize - 1) >> ashift) + 1; asize += nparity * ((asize + ndata - 1) / ndata); asize = roundup(asize, nparity + 1) << ashift; return (asize); } /* * Derived from function of same name in module/zfs/vdev_draid.c. Returns the * amount of space (in bytes) that will be allocated for the specified block * size. */ static uint64_t vdev_draid_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift, uint64_t blksize) { ASSERT3U(ndisks, >, nparity); uint64_t ndata = ndisks - nparity; uint64_t rows = ((blksize - 1) / (ndata << ashift)) + 1; uint64_t asize = (rows * ndisks) << ashift; return (asize); } /* * Determine how much space will be allocated if it lands on the most space- * inefficient top-level vdev. Returns the size in bytes required to store one * copy of the volume data. See theory comment above. */ static uint64_t volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize) { nvlist_t *config, *tree, **vdevs; uint_t nvdevs; uint64_t ret = 0; config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 || nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &vdevs, &nvdevs) != 0) { return (nblocks * blksize); } for (int v = 0; v < nvdevs; v++) { char *type; uint64_t nparity, ashift, asize, tsize; uint64_t volsize; if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE, &type) != 0) continue; if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 && strcmp(type, VDEV_TYPE_DRAID) != 0) continue; if (nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_NPARITY, &nparity) != 0) continue; if (nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_ASHIFT, &ashift) != 0) continue; if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { nvlist_t **disks; uint_t ndisks; if (nvlist_lookup_nvlist_array(vdevs[v], ZPOOL_CONFIG_CHILDREN, &disks, &ndisks) != 0) continue; /* allocation size for the "typical" 128k block */ tsize = vdev_raidz_asize(ndisks, nparity, ashift, SPA_OLD_MAXBLOCKSIZE); /* allocation size for the blksize block */ asize = vdev_raidz_asize(ndisks, nparity, ashift, blksize); } else { uint64_t ndata; if (nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_DRAID_NDATA, &ndata) != 0) continue; /* allocation size for the "typical" 128k block */ tsize = vdev_draid_asize(ndata + nparity, nparity, ashift, SPA_OLD_MAXBLOCKSIZE); /* allocation size for the blksize block */ asize = vdev_draid_asize(ndata + nparity, nparity, ashift, blksize); } /* * Scale this size down as a ratio of 128k / tsize. * See theory statement above. */ volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize; if (volsize > ret) { ret = volsize; } } if (ret == 0) { ret = nblocks * blksize; } return (ret); } /* * Convert the zvol's volume size to an appropriate reservation. See theory * comment above. * * Note: If this routine is updated, it is necessary to update the ZFS test * suite's shell version in reservation.shlib. */ uint64_t zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize, nvlist_t *props) { uint64_t numdb; uint64_t nblocks, volblocksize; int ncopies; char *strval; if (nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0) ncopies = atoi(strval); else ncopies = 1; if (nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) volblocksize = ZVOL_DEFAULT_BLOCKSIZE; nblocks = volsize / volblocksize; /* * Metadata defaults to using 128k blocks, not volblocksize blocks. For * this reason, only the data blocks are scaled based on vdev config. */ volsize = volsize_from_vdevs(zph, nblocks, volblocksize); /* start with metadnode L0-L6 */ numdb = 7; /* calculate number of indirects */ while (nblocks > 1) { nblocks += DNODES_PER_LEVEL - 1; nblocks /= DNODES_PER_LEVEL; numdb += nblocks; } numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1); volsize *= ncopies; /* * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't * compressed, but in practice they compress down to about * 1100 bytes */ numdb *= 1ULL << DN_MAX_INDBLKSHIFT; volsize += numdb; return (volsize); } /* * Wait for the given activity and return the status of the wait (whether or not * any waiting was done) in the 'waited' parameter. Non-existent fses are * reported via the 'missing' parameter, rather than by printing an error * message. This is convenient when this function is called in a loop over a * long period of time (as it is, for example, by zfs's wait cmd). In that * scenario, a fs being exported or destroyed should be considered a normal * event, so we don't want to print an error when we find that the fs doesn't * exist. */ int zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity, boolean_t *missing, boolean_t *waited) { int error = lzc_wait_fs(zhp->zfs_name, activity, waited); *missing = (error == ENOENT); if (*missing) return (0); if (error != 0) { (void) zfs_standard_error_fmt(zhp->zfs_hdl, error, dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"), zhp->zfs_name); } return (error); } diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7 index 0e656fee8d51..93a7bfcc865f 100644 --- a/sys/contrib/openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/openzfs/man/man7/zfsprops.7 @@ -1,2084 +1,2087 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or https://opensource.org/licenses/CDDL-1.0. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2011 Joshua M. Clulow .\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. .\" Copyright (c) 2011, Pawel Jakub Dawidek .\" Copyright (c) 2012, Glen Barber .\" Copyright (c) 2012, Bryan Drewery .\" Copyright (c) 2013, Steven Hartland .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. .\" Copyright (c) 2014, Joyent, Inc. All rights reserved. .\" Copyright (c) 2014 by Adam Stevko. All rights reserved. .\" Copyright (c) 2014 Integros [integros.com] .\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. .\" Copyright (c) 2014, Xin LI .\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. .\" Copyright 2019 Richard Laager. All rights reserved. .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" .Dd May 24, 2021 .Dt ZFSPROPS 7 .Os . .Sh NAME .Nm zfsprops .Nd native and user-defined properties of ZFS datasets . .Sh DESCRIPTION Properties are divided into two types, native properties and user-defined .Po or .Qq user .Pc properties. Native properties either export internal statistics or control ZFS behavior. In addition, native properties are either editable or read-only. User properties have no effect on ZFS behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the .Sx User Properties section, below. . .Ss Native Properties Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets .Pq file systems, volumes, or snapshots . .Pp The values of numeric properties can be specified using human-readable suffixes .Po for example, .Sy k , .Sy KB , .Sy M , .Sy Gb , and so forth, up to .Sy Z for zettabyte .Pc . The following are all valid .Pq and equal specifications: .Li 1536M, 1.5g, 1.50GB . .Pp The values of non-numeric properties are case sensitive and must be lowercase, except for .Sy mountpoint , .Sy sharenfs , and .Sy sharesmb . .Pp The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted. .Bl -tag -width "usedbyrefreservation" .It Sy available The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool. .Pp This property can also be referred to by its shortened column name, .Sy avail . .It Sy compressratio For non-snapshots, the compression ratio achieved for the .Sy used space of this dataset, expressed as a multiplier. The .Sy used property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the .Sy compressratio is the same as the .Sy refcompressratio property. Compression can be turned on by running: .Nm zfs Cm set Sy compression Ns = Ns Sy on Ar dataset . The default value is .Sy off . .It Sy createtxg The transaction group (txg) in which the dataset was created. Bookmarks have the same .Sy createtxg as the snapshot they are initially tied to. This property is suitable for ordering a list of snapshots, e.g. for incremental send and receive. .It Sy creation The time this dataset was created. .It Sy clones For snapshots, this property is a comma-separated list of filesystems or volumes which are clones of this snapshot. The clones' .Sy origin property is this snapshot. If the .Sy clones property is not empty, then this snapshot can not be destroyed .Po even with the .Fl r or .Fl f options .Pc . The roles of origin and clone can be swapped by promoting the clone with the .Nm zfs Cm promote command. .It Sy defer_destroy This property is .Sy on if the snapshot has been marked for deferred destroy by using the .Nm zfs Cm destroy Fl d command. Otherwise, the property is .Sy off . .It Sy encryptionroot For encrypted datasets, indicates where the dataset is currently inheriting its encryption key from. Loading or unloading a key for the .Sy encryptionroot will implicitly load / unload the key for any inheriting datasets (see .Nm zfs Cm load-key and .Nm zfs Cm unload-key for details). Clones will always share an encryption key with their origin. See the .Sx Encryption section of .Xr zfs-load-key 8 for details. .It Sy filesystem_count The total number of filesystems and volumes that exist under this location in the dataset tree. This value is only available when a .Sy filesystem_limit has been set somewhere in the tree under which the dataset resides. .It Sy keystatus Indicates if an encryption key is currently loaded into ZFS. The possible values are .Sy none , .Sy available , and .Sy unavailable . See .Nm zfs Cm load-key and .Nm zfs Cm unload-key . .It Sy guid The 64 bit GUID of this dataset or bookmark which does not change over its entire lifetime. When a snapshot is sent to another pool, the received snapshot has the same GUID. Thus, the .Sy guid is suitable to identify a snapshot across pools. .It Sy logicalreferenced The amount of space that is .Qq logically accessible by this dataset. See the .Sy referenced property. The logical space ignores the effect of the .Sy compression and .Sy copies properties, giving a quantity closer to the amount of data that applications see. However, it does include space consumed by metadata. .Pp This property can also be referred to by its shortened column name, .Sy lrefer . .It Sy logicalused The amount of space that is .Qq logically consumed by this dataset and all its descendents. See the .Sy used property. The logical space ignores the effect of the .Sy compression and .Sy copies properties, giving a quantity closer to the amount of data that applications see. However, it does include space consumed by metadata. .Pp This property can also be referred to by its shortened column name, .Sy lused . .It Sy mounted For file systems, indicates whether the file system is currently mounted. This property can be either .Sy yes or .Sy no . .It Sy objsetid A unique identifier for this dataset within the pool. Unlike the dataset's .Sy guid , No the Sy objsetid of a dataset is not transferred to other pools when the snapshot is copied with a send/receive operation. The .Sy objsetid can be reused (for a new dataset) after the dataset is deleted. .It Sy origin For cloned file systems or volumes, the snapshot from which the clone was created. See also the .Sy clones property. .It Sy receive_resume_token For filesystems or volumes which have saved partially-completed state from .Nm zfs Cm receive Fl s , this opaque token can be provided to .Nm zfs Cm send Fl t to resume and complete the .Nm zfs Cm receive . .It Sy redact_snaps For bookmarks, this is the list of snapshot guids the bookmark contains a redaction list for. For snapshots, this is the list of snapshot guids the snapshot is redacted with respect to. .It Sy referenced The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical. .Pp This property can also be referred to by its shortened column name, .Sy refer . .It Sy refcompressratio The compression ratio achieved for the .Sy referenced space of this dataset, expressed as a multiplier. See also the .Sy compressratio property. .It Sy snapshot_count The total number of snapshots that exist under this location in the dataset tree. This value is only available when a .Sy snapshot_limit has been set somewhere in the tree under which the dataset resides. .It Sy type The type of dataset: .Sy filesystem , .Sy volume , .Sy snapshot , or .Sy bookmark . .It Sy used The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets. The amount of space that a dataset consumes from its parent, as well as the amount of space that is freed if this dataset is recursively destroyed, is the greater of its space used and its reservation. .Pp The used space of a snapshot .Po see the .Sx Snapshots section of .Xr zfsconcepts 7 .Pc is space that is referenced exclusively by this snapshot. If this snapshot is destroyed, the amount of .Sy used space will be freed. Space that is shared by multiple snapshots isn't accounted for in this metric. When a snapshot is destroyed, space that was previously shared with this snapshot can become unique to snapshots adjacent to it, thus changing the used space of those snapshots. The used space of the latest snapshot can also be affected by changes in the file system. Note that the .Sy used space of a snapshot is a subset of the .Sy written space of the snapshot. .Pp The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using .Xr fsync 2 or .Sy O_SYNC does not necessarily guarantee that the space usage information is updated immediately. .It Sy usedby* The .Sy usedby* properties decompose the .Sy used properties into the various reasons that space is used. Specifically, .Sy used No = .Sy usedbychildren No + .Sy usedbydataset No + .Sy usedbyrefreservation No + .Sy usedbysnapshots . These properties are only available for datasets created on .Nm zpool .Qo version 13 Qc pools. .It Sy usedbychildren The amount of space used by children of this dataset, which would be freed if all the dataset's children were destroyed. .It Sy usedbydataset The amount of space used by this dataset itself, which would be freed if the dataset were destroyed .Po after first removing any .Sy refreservation and destroying any necessary snapshots or descendents .Pc . .It Sy usedbyrefreservation The amount of space used by a .Sy refreservation set on this dataset, which would be freed if the .Sy refreservation was removed. .It Sy usedbysnapshots The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' .Sy used properties because space can be shared by multiple snapshots. .It Sy userused Ns @ Ns Ar user The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by .Nm ls Fl l . The amount of space charged is displayed by .Nm du No and Nm ls Fl s . See the .Nm zfs Cm userspace command for more information. .Pp Unprivileged users can access only their own space usage. The root user, or a user who has been granted the .Sy userused privilege with .Nm zfs Cm allow , can access everyone's usage. .Pp The .Sy userused Ns @ Ns Ar … properties are not displayed by .Nm zfs Cm get Sy all . The user's name must be appended after the .Sy @ symbol, using one of the following forms: .Bl -bullet -compact -offset 4n .It POSIX name .Pq Qq joe .It POSIX numeric ID .Pq Qq 789 .It SID name .Pq Qq joe.smith@mydomain .It SID numeric ID .Pq Qq S-1-123-456-789 .El .Pp Files created on Linux always have POSIX owners. .It Sy userobjused Ns @ Ns Ar user The .Sy userobjused property is similar to .Sy userused but instead it counts the number of objects consumed by a user. This property counts all objects allocated on behalf of the user, it may differ from the results of system tools such as .Nm df Fl i . .Pp When the property .Sy xattr Ns = Ns Sy on is set on a file system additional objects will be created per-file to store extended attributes. These additional objects are reflected in the .Sy userobjused value and are counted against the user's .Sy userobjquota . When a file system is configured to use .Sy xattr Ns = Ns Sy sa no additional internal objects are normally required. .It Sy userrefs This property is set to the number of user holds on this snapshot. User holds are set by using the .Nm zfs Cm hold command. .It Sy groupused Ns @ Ns Ar group The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by .Nm ls Fl l . See the .Sy userused Ns @ Ns Ar user property for more information. .Pp Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the .Sy groupused privilege with .Nm zfs Cm allow , can access all groups' usage. .It Sy groupobjused Ns @ Ns Ar group The number of objects consumed by the specified group in this dataset. Multiple objects may be charged to the group for each file when extended attributes are in use. See the .Sy userobjused Ns @ Ns Ar user property for more information. .Pp Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the .Sy groupobjused privilege with .Nm zfs Cm allow , can access all groups' usage. .It Sy projectused Ns @ Ns Ar project The amount of space consumed by the specified project in this dataset. Project is identified via the project identifier (ID) that is object-based numeral attribute. An object can inherit the project ID from its parent object (if the parent has the flag of inherit project ID that can be set and changed via .Nm chattr Fl /+P or .Nm zfs project Fl s ) when being created. The privileged user can set and change object's project ID via .Nm chattr Fl p or .Nm zfs project Fl s anytime. Space is charged to the project of each file, as displayed by .Nm lsattr Fl p or .Nm zfs project . See the .Sy userused Ns @ Ns Ar user property for more information. .Pp The root user, or a user who has been granted the .Sy projectused privilege with .Nm zfs allow , can access all projects' usage. .It Sy projectobjused Ns @ Ns Ar project The .Sy projectobjused is similar to .Sy projectused but instead it counts the number of objects consumed by project. When the property .Sy xattr Ns = Ns Sy on is set on a fileset, ZFS will create additional objects per-file to store extended attributes. These additional objects are reflected in the .Sy projectobjused value and are counted against the project's .Sy projectobjquota . When a filesystem is configured to use .Sy xattr Ns = Ns Sy sa no additional internal objects are required. See the .Sy userobjused Ns @ Ns Ar user property for more information. .Pp The root user, or a user who has been granted the .Sy projectobjused privilege with .Nm zfs allow , can access all projects' objects usage. .It Sy snapshots_changed Provides a mechanism to quickly determine whether snapshot list has changed without having to mount a dataset or iterate the snapshot list. Specifies the time at which a snapshot for a dataset was last created or deleted. .Pp This allows us to be more efficient how often we query snapshots. +The property is persistent across mount and unmount operations only if the +.Sy extensible_dataset +feature is enabled. .It Sy volblocksize For volumes, specifies the block size of the volume. The .Sy blocksize cannot be changed once the volume has been written, so it should be set at volume creation time. The default .Sy blocksize for volumes is 16 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid. .Pp This property can also be referred to by its shortened column name, .Sy volblock . .It Sy written The amount of space .Sy referenced by this dataset, that was written since the previous snapshot .Pq i.e. that is not referenced by the previous snapshot . .It Sy written Ns @ Ns Ar snapshot The amount of .Sy referenced space written to this dataset since the specified snapshot. This is the space that is referenced by this dataset but was not referenced by the specified snapshot. .Pp The .Ar snapshot may be specified as a short snapshot name .Pq just the part after the Sy @ , in which case it will be interpreted as a snapshot in the same filesystem as this dataset. The .Ar snapshot may be a full snapshot name .Pq Ar filesystem Ns @ Ns Ar snapshot , which for clones may be a snapshot in the origin's filesystem .Pq or the origin of the origin's filesystem, etc. .El .Pp The following native properties can be used to change the behavior of a ZFS dataset. .Bl -tag -width "" .It Xo .Sy aclinherit Ns = Ns Sy discard Ns | Ns Sy noallow Ns | Ns .Sy restricted Ns | Ns Sy passthrough Ns | Ns Sy passthrough-x .Xc Controls how ACEs are inherited when files and directories are created. .Bl -tag -compact -offset 4n -width "passthrough-x" .It Sy discard does not inherit any ACEs. .It Sy noallow only inherits inheritable ACEs that specify .Qq deny permissions. .It Sy restricted default, removes the .Sy write_acl and .Sy write_owner permissions when the ACE is inherited. .It Sy passthrough inherits all inheritable ACEs without any modifications. .It Sy passthrough-x same meaning as .Sy passthrough , except that the .Sy owner@ , group@ , No and Sy everyone@ ACEs inherit the execute permission only if the file creation mode also requests the execute bit. .El .Pp When the property value is set to .Sy passthrough , files are created with a mode determined by the inheritable ACEs. If no inheritable ACEs exist that affect the mode, then the mode is set in accordance to the requested mode from the application. .Pp The .Sy aclinherit property does not apply to POSIX ACLs. .It Xo .Sy aclmode Ns = Ns Sy discard Ns | Ns Sy groupmask Ns | Ns .Sy passthrough Ns | Ns Sy restricted Ns .Xc Controls how an ACL is modified during chmod(2) and how inherited ACEs are modified by the file creation mode: .Bl -tag -compact -offset 4n -width "passthrough" .It Sy discard default, deletes all .Sy ACEs except for those representing the mode of the file or directory requested by .Xr chmod 2 . .It Sy groupmask reduces permissions granted in all .Sy ALLOW entries found in the .Sy ACL such that they are no greater than the group permissions specified by .Xr chmod 2 . .It Sy passthrough indicates that no changes are made to the ACL other than creating or updating the necessary ACL entries to represent the new mode of the file or directory. .It Sy restricted will cause the .Xr chmod 2 operation to return an error when used on any file or directory which has a non-trivial ACL whose entries can not be represented by a mode. .Xr chmod 2 is required to change the set user ID, set group ID, or sticky bits on a file or directory, as they do not have equivalent ACL entries. In order to use .Xr chmod 2 on a file or directory with a non-trivial ACL when .Sy aclmode is set to .Sy restricted , you must first remove all ACL entries which do not represent the current mode. .El .It Sy acltype Ns = Ns Sy off Ns | Ns Sy nfsv4 Ns | Ns Sy posix Controls whether ACLs are enabled and if so what type of ACL to use. When this property is set to a type of ACL not supported by the current platform, the behavior is the same as if it were set to .Sy off . .Bl -tag -compact -offset 4n -width "posixacl" .It Sy off default on Linux, when a file system has the .Sy acltype property set to off then ACLs are disabled. .It Sy noacl an alias for .Sy off .It Sy nfsv4 default on .Fx , indicates that NFSv4-style ZFS ACLs should be used. These ACLs can be managed with the .Xr getfacl 1 and .Xr setfacl 1 . The .Sy nfsv4 ZFS ACL type is not yet supported on Linux. .It Sy posix indicates POSIX ACLs should be used. POSIX ACLs are specific to Linux and are not functional on other platforms. POSIX ACLs are stored as an extended attribute and therefore will not overwrite any existing NFSv4 ACLs which may be set. .It Sy posixacl an alias for .Sy posix .El .Pp To obtain the best performance when setting .Sy posix users are strongly encouraged to set the .Sy xattr Ns = Ns Sy sa property. This will result in the POSIX ACL being stored more efficiently on disk. But as a consequence, all new extended attributes will only be accessible from OpenZFS implementations which support the .Sy xattr Ns = Ns Sy sa property. See the .Sy xattr property for more details. .It Sy atime Ns = Ns Sy on Ns | Ns Sy off Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The values .Sy on and .Sy off are equivalent to the .Sy atime and .Sy noatime mount options. The default value is .Sy on . See also .Sy relatime below. .It Sy canmount Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy noauto If this property is set to .Sy off , the file system cannot be mounted, and is ignored by .Nm zfs Cm mount Fl a . Setting this property to .Sy off is similar to setting the .Sy mountpoint property to .Sy none , except that the dataset still has a normal .Sy mountpoint property, which can be inherited. Setting this property to .Sy off allows datasets to be used solely as a mechanism to inherit properties. One example of setting .Sy canmount Ns = Ns Sy off is to have two datasets with the same .Sy mountpoint , so that the children of both datasets appear in the same directory, but might have different inherited characteristics. .Pp When set to .Sy noauto , a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the .Nm zfs Cm mount Fl a command or unmounted by the .Nm zfs Cm unmount Fl a command. .Pp This property is not inherited. .It Xo .Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns .Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns .Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr Ns | Ns Sy blake3 .Xc Controls the checksum used to verify data integrity. The default value is .Sy on , which automatically selects an appropriate algorithm .Po currently, .Sy fletcher4 , but this may change in future releases .Pc . The value .Sy off disables integrity checking on user data. The value .Sy noparity not only disables integrity but also disables maintaining parity for user data. This setting is used internally by a dump device residing on a RAID-Z pool and should not be used by any other dataset. Disabling checksums is .Em NOT a recommended practice. .Pp The .Sy sha512 , .Sy skein , .Sy edonr , and .Sy blake3 checksum algorithms require enabling the appropriate features on the pool. .Pp Please see .Xr zpool-features 7 for more information on these algorithms. .Pp Changing this property affects only newly-written data. .It Xo .Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns .Sy gzip- Ns Ar N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle Ns | Ns Sy zstd Ns | Ns .Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns Ar N .Xc Controls the compression algorithm used for this dataset. .Pp When set to .Sy on (the default), indicates that the current default compression algorithm should be used. The default balances compression and decompression speed, with compression ratio and is expected to work well on a wide variety of workloads. Unlike all other settings for this property, .Sy on does not select a fixed compression type. As new compression algorithms are added to ZFS and enabled on a pool, the default compression algorithm may change. The current default compression algorithm is either .Sy lzjb or, if the .Sy lz4_compress feature is enabled, .Sy lz4 . .Pp The .Sy lz4 compression algorithm is a high-performance replacement for the .Sy lzjb algorithm. It features significantly faster compression and decompression, as well as a moderately higher compression ratio than .Sy lzjb , but can only be used on pools with the .Sy lz4_compress feature set to .Sy enabled . See .Xr zpool-features 7 for details on ZFS feature flags and the .Sy lz4_compress feature. .Pp The .Sy lzjb compression algorithm is optimized for performance while providing decent data compression. .Pp The .Sy gzip compression algorithm uses the same compression as the .Xr gzip 1 command. You can specify the .Sy gzip level by using the value .Sy gzip- Ns Ar N , where .Ar N is an integer from 1 .Pq fastest to 9 .Pq best compression ratio . Currently, .Sy gzip is equivalent to .Sy gzip-6 .Po which is also the default for .Xr gzip 1 .Pc . .Pp The .Sy zstd compression algorithm provides both high compression ratios and good performance. You can specify the .Sy zstd level by using the value .Sy zstd- Ns Ar N , where .Ar N is an integer from 1 .Pq fastest to 19 .Pq best compression ratio . .Sy zstd is equivalent to .Sy zstd-3 . .Pp Faster speeds at the cost of the compression ratio can be requested by setting a negative .Sy zstd level. This is done using .Sy zstd-fast- Ns Ar N , where .Ar N is an integer in .Bq Sy 1 Ns - Ns Sy 10 , 20 , 30 , No … , Sy 100 , 500 , 1000 which maps to a negative .Sy zstd level. The lower the level the faster the compression \(em .Sy 1000 provides the fastest compression and lowest compression ratio. .Sy zstd-fast is equivalent to .Sy zstd-fast- Ns Ar 1 . .Pp The .Sy zle compression algorithm compresses runs of zeros. .Pp This property can also be referred to by its shortened column name .Sy compress . Changing this property affects only newly-written data. .Pp When any setting except .Sy off is selected, compression will explicitly check for blocks consisting of only zeroes (the NUL byte). When a zero-filled block is detected, it is stored as a hole and not compressed using the indicated compression algorithm. .Pp Any block being compressed must be no larger than 7/8 of its original size after compression, otherwise the compression will not be considered worthwhile and the block saved uncompressed. Note that when the logical block is less than 8 times the disk sector size this effectively reduces the necessary compression ratio; for example, 8 KiB blocks on disks with 4 KiB disk sectors must compress to 1/2 or less of their original size. .It Xo .Sy context Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level .Xc This flag sets the SELinux context for all files in the file system under a mount point for that file system. See .Xr selinux 8 for more information. .It Xo .Sy fscontext Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level .Xc This flag sets the SELinux context for the file system file system being mounted. See .Xr selinux 8 for more information. .It Xo .Sy defcontext Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level .Xc This flag sets the SELinux default context for unlabeled files. See .Xr selinux 8 for more information. .It Xo .Sy rootcontext Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level .Xc This flag sets the SELinux context for the root inode of the file system. See .Xr selinux 8 for more information. .It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3 Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the .Sy used property and counting against quotas and reservations. .Pp Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the .Fl o Sy copies Ns = Ns Ar N option. .Pp Remember that ZFS will not import a pool with a missing top-level vdev. Do .Em NOT create, for example a two-disk striped pool and set .Sy copies Ns = Ns Ar 2 on some datasets thinking you have setup redundancy for them. When a disk fails you will not be able to import the pool and will have lost all of your data. .Pp Encrypted datasets may not have .Sy copies Ns = Ns Ar 3 since the implementation stores some encryption metadata where the third copy would normally be. .It Sy devices Ns = Ns Sy on Ns | Ns Sy off Controls whether device nodes can be opened on this file system. The default value is .Sy on . The values .Sy on and .Sy off are equivalent to the .Sy dev and .Sy nodev mount options. .It Xo .Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns .Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns .Sy edonr , Ns Sy verify Ns | Ns Sy blake3 Ns Oo , Ns Sy verify Oc Ns .Xc Configures deduplication for a dataset. The default value is .Sy off . The default deduplication checksum is .Sy sha256 (this may change in the future). When .Sy dedup is enabled, the checksum defined here overrides the .Sy checksum property. Setting the value to .Sy verify has the same effect as the setting .Sy sha256 , Ns Sy verify . .Pp If set to .Sy verify , ZFS will do a byte-to-byte comparison in case of two blocks having the same signature to make sure the block contents are identical. Specifying .Sy verify is mandatory for the .Sy edonr algorithm. .Pp Unless necessary, deduplication should .Em not be enabled on a system. See the .Sx Deduplication section of .Xr zfsconcepts 7 . .It Xo .Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns .Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k .Xc Specifies a compatibility mode or literal value for the size of dnodes in the file system. The default value is .Sy legacy . Setting this property to a value other than .Sy legacy No requires the Sy large_dnode No pool feature to be enabled. .Pp Consider setting .Sy dnodesize to .Sy auto if the dataset uses the .Sy xattr Ns = Ns Sy sa property setting and the workload makes heavy use of extended attributes. This may be applicable to SELinux-enabled systems, Lustre servers, and Samba servers, for example. Literal values are supported for cases where the optimal size is known in advance and for performance testing. .Pp Leave .Sy dnodesize set to .Sy legacy if you need to receive a send stream of this dataset on a pool that doesn't enable the .Sy large_dnode feature, or if you need to import this pool on a system that doesn't support the .Sy large_dnode No feature. .Pp This property can also be referred to by its shortened column name, .Sy dnsize . .It Xo .Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns .Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns .Sy aes-192-gcm Ns | Ns Sy aes-256-gcm .Xc Controls the encryption cipher suite (block cipher, key length, and mode) used for this dataset. Requires the .Sy encryption feature to be enabled on the pool. Requires a .Sy keyformat to be set at dataset creation time. .Pp Selecting .Sy encryption Ns = Ns Sy on when creating a dataset indicates that the default encryption suite will be selected, which is currently .Sy aes-256-gcm . In order to provide consistent data protection, encryption must be specified at dataset creation time and it cannot be changed afterwards. .Pp For more details and caveats about encryption see the .Sx Encryption section of .Xr zfs-load-key 8 . .It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase Controls what format the user's encryption key will be provided as. This property is only set when the dataset is encrypted. .Pp Raw keys and hex keys must be 32 bytes long (regardless of the chosen encryption suite) and must be randomly generated. A raw key can be generated with the following command: .Dl # Nm dd Sy if=/dev/urandom bs=32 count=1 Sy of= Ns Pa /path/to/output/key .Pp Passphrases must be between 8 and 512 bytes long and will be processed through PBKDF2 before being used (see the .Sy pbkdf2iters property). Even though the encryption suite cannot be changed after dataset creation, the keyformat can be with .Nm zfs Cm change-key . .It Xo .Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Ar /absolute/file/path Ns | Ns Sy https:// Ns Ar address Ns | Ns Sy http:// Ns Ar address .Xc Controls where the user's encryption key will be loaded from by default for commands such as .Nm zfs Cm load-key and .Nm zfs Cm mount Fl l . This property is only set for encrypted datasets which are encryption roots. If unspecified, the default is .Sy prompt . .Pp Even though the encryption suite cannot be changed after dataset creation, the keylocation can be with either .Nm zfs Cm set or .Nm zfs Cm change-key . If .Sy prompt is selected ZFS will ask for the key at the command prompt when it is required to access the encrypted data (see .Nm zfs Cm load-key for details). This setting will also allow the key to be passed in via the standard input stream, but users should be careful not to place keys which should be kept secret on the command line. If a file URI is selected, the key will be loaded from the specified absolute file path. If an HTTPS or HTTP URL is selected, it will be GETted using .Xr fetch 3 , libcurl, or nothing, depending on compile-time configuration and run-time availability. The .Sy SSL_CA_CERT_FILE environment variable can be set to set the location of the concatenated certificate store. The .Sy SSL_CA_CERT_PATH environment variable can be set to override the location of the directory containing the certificate authority bundle. The .Sy SSL_CLIENT_CERT_FILE and .Sy SSL_CLIENT_KEY_FILE environment variables can be set to configure the path to the client certificate and its key. .It Sy pbkdf2iters Ns = Ns Ar iterations Controls the number of PBKDF2 iterations that a .Sy passphrase encryption key should be run through when processing it into an encryption key. This property is only defined when encryption is enabled and a keyformat of .Sy passphrase is selected. The goal of PBKDF2 is to significantly increase the computational difficulty needed to brute force a user's passphrase. This is accomplished by forcing the attacker to run each passphrase through a computationally expensive hashing function many times before they arrive at the resulting key. A user who actually knows the passphrase will only have to pay this cost once. As CPUs become better at processing, this number should be raised to ensure that a brute force attack is still not possible. The current default is .Sy 350000 and the minimum is .Sy 100000 . This property may be changed with .Nm zfs Cm change-key . .It Sy exec Ns = Ns Sy on Ns | Ns Sy off Controls whether processes can be executed from within this file system. The default value is .Sy on . The values .Sy on and .Sy off are equivalent to the .Sy exec and .Sy noexec mount options. .It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none Limits the number of filesystems and volumes that can exist under this point in the dataset tree. The limit is not enforced if the user is allowed to change the limit. Setting a .Sy filesystem_limit to .Sy on a descendent of a filesystem that already has a .Sy filesystem_limit does not override the ancestor's .Sy filesystem_limit , but rather imposes an additional limit. This feature must be enabled to be used .Po see .Xr zpool-features 7 .Pc . .It Sy special_small_blocks Ns = Ns Ar size This value represents the threshold block size for including small file blocks into the special allocation class. Blocks smaller than or equal to this value will be assigned to the special allocation class while greater blocks will be assigned to the regular class. Valid values are zero or a power of two from 512 up to 1048576 (1 MiB). The default size is 0 which means no small file blocks will be allocated in the special class. .Pp Before setting this property, a special class vdev must be added to the pool. See .Xr zpoolconcepts 7 for more details on the special allocation class. .It Sy mountpoint Ns = Ns Pa path Ns | Ns Sy none Ns | Ns Sy legacy Controls the mount point used for this file system. See the .Sx Mount Points section of .Xr zfsconcepts 7 for more information on how this property is used. .Pp When the .Sy mountpoint property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is .Sy legacy , then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously .Sy legacy or .Sy none , or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location. .It Sy nbmand Ns = Ns Sy on Ns | Ns Sy off Controls whether the file system should be mounted with .Sy nbmand .Pq Non-blocking mandatory locks . This is used for SMB clients. Changes to this property only take effect when the file system is umounted and remounted. Support for these locks is scarce and not described by POSIX. .It Sy overlay Ns = Ns Sy on Ns | Ns Sy off Allow mounting on a busy directory or a directory which already contains files or directories. This is the default mount behavior for Linux and .Fx file systems. On these platforms the property is .Sy on by default. Set to .Sy off to disable overlay mounts for consistency with OpenZFS on other platforms. .It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata Controls what is cached in the primary cache .Pq ARC . If this property is set to .Sy all , then both user data and metadata is cached. If this property is set to .Sy none , then neither user data nor metadata is cached. If this property is set to .Sy metadata , then only metadata is cached. The default value is .Sy all . .It Sy quota Ns = Ns Ar size Ns | Ns Sy none Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit. .Pp Quotas cannot be set on volumes, as the .Sy volsize property acts as an implicit quota. .It Sy snapshot_limit Ns = Ns Ar count Ns | Ns Sy none Limits the number of snapshots that can be created on a dataset and its descendents. Setting a .Sy snapshot_limit on a descendent of a dataset that already has a .Sy snapshot_limit does not override the ancestor's .Sy snapshot_limit , but rather imposes an additional limit. The limit is not enforced if the user is allowed to change the limit. For example, this means that recursive snapshots taken from the global zone are counted against each delegated dataset within a zone. This feature must be enabled to be used .Po see .Xr zpool-features 7 .Pc . .It Sy userquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none Limits the amount of space consumed by the specified user. User space consumption is identified by the .Sy userspace@ Ns Ar user property. .Pp Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the .Er EDQUOT error message. See the .Nm zfs Cm userspace command for more information. .Pp Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the .Sy userquota privilege with .Nm zfs Cm allow , can get and set everyone's quota. .Pp This property is not available on volumes, on file systems before version 4, or on pools before version 15. The .Sy userquota@ Ns Ar … properties are not displayed by .Nm zfs Cm get Sy all . The user's name must be appended after the .Sy @ symbol, using one of the following forms: .Bl -bullet -compact -offset 4n .It POSIX name .Pq Qq joe .It POSIX numeric ID .Pq Qq 789 .It SID name .Pq Qq joe.smith@mydomain .It SID numeric ID .Pq Qq S-1-123-456-789 .El .Pp Files created on Linux always have POSIX owners. .It Sy userobjquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none The .Sy userobjquota is similar to .Sy userquota but it limits the number of objects a user can create. Please refer to .Sy userobjused for more information about how objects are counted. .It Sy groupquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none Limits the amount of space consumed by the specified group. Group space consumption is identified by the .Sy groupused@ Ns Ar group property. .Pp Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the .Sy groupquota privilege with .Nm zfs Cm allow , can get and set all groups' quotas. .It Sy groupobjquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none The .Sy groupobjquota is similar to .Sy groupquota but it limits number of objects a group can consume. Please refer to .Sy userobjused for more information about how objects are counted. .It Sy projectquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none Limits the amount of space consumed by the specified project. Project space consumption is identified by the .Sy projectused@ Ns Ar project property. Please refer to .Sy projectused for more information about how project is identified and set/changed. .Pp The root user, or a user who has been granted the .Sy projectquota privilege with .Nm zfs allow , can access all projects' quota. .It Sy projectobjquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none The .Sy projectobjquota is similar to .Sy projectquota but it limits number of objects a project can consume. Please refer to .Sy userobjused for more information about how objects are counted. .It Sy readonly Ns = Ns Sy on Ns | Ns Sy off Controls whether this dataset can be modified. The default value is .Sy off . The values .Sy on and .Sy off are equivalent to the .Sy ro and .Sy rw mount options. .Pp This property can also be referred to by its shortened column name, .Sy rdonly . .It Sy recordsize Ns = Ns Ar size Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. ZFS automatically tunes block sizes according to internal algorithms optimized for typical access patterns. .Pp For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a .Sy recordsize greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance. .Pp The size specified must be a power of two greater than or equal to .Ar 512 B and less than or equal to .Ar 128 KiB . If the .Sy large_blocks feature is enabled on the pool, the size may be up to .Ar 1 MiB . See .Xr zpool-features 7 for details on ZFS feature flags. .Pp Changing the file system's .Sy recordsize affects only files created afterward; existing files are unaffected. .Pp This property can also be referred to by its shortened column name, .Sy recsize . .It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Controls what types of metadata are stored redundantly. ZFS stores an extra copy of metadata, so that if a single block is corrupted, the amount of user data lost is limited. This extra copy is in addition to any redundancy provided at the pool level .Pq e.g. by mirroring or RAID-Z , and is in addition to an extra copy specified by the .Sy copies property .Pq up to a total of 3 copies . For example if the pool is mirrored, .Sy copies Ns = Ns 2 , and .Sy redundant_metadata Ns = Ns Sy most , then ZFS stores 6 copies of most metadata, and 4 copies of data and some metadata. .Pp When set to .Sy all , ZFS stores an extra copy of all metadata. If a single on-disk block is corrupt, at worst a single block of user data .Po which is .Sy recordsize bytes long .Pc can be lost. .Pp When set to .Sy most , ZFS stores an extra copy of most types of metadata. This can improve performance of random writes, because less metadata must be written. In practice, at worst about 100 blocks .Po of .Sy recordsize bytes each .Pc of user data can be lost if a single on-disk block is corrupt. The exact behavior of which metadata blocks are stored redundantly may change in future releases. .Pp The default value is .Sy all . .It Sy refquota Ns = Ns Ar size Ns | Ns Sy none Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots. .It Sy refreservation Ns = Ns Ar size Ns | Ns Sy none Ns | Ns Sy auto The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by .Sy refreservation . The .Sy refreservation reservation is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations. .Pp If .Sy refreservation is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of .Qq referenced bytes in the dataset. .Pp If .Sy refreservation is set to .Sy auto , a volume is thick provisioned .Po or .Qq not sparse .Pc . .Sy refreservation Ns = Ns Sy auto is only supported on volumes. See .Sy volsize in the .Sx Native Properties section for more information about sparse volumes. .Pp This property can also be referred to by its shortened column name, .Sy refreserv . .It Sy relatime Ns = Ns Sy on Ns | Ns Sy off Controls the manner in which the access time is updated when .Sy atime Ns = Ns Sy on is set. Turning this property on causes the access time to be updated relative to the modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time or if the existing access time hasn't been updated within the past 24 hours. The default value is -.Sy off . +.Sy on . The values .Sy on and .Sy off are equivalent to the .Sy relatime and .Sy norelatime mount options. .It Sy reservation Ns = Ns Ar size Ns | Ns Sy none The minimum amount of space guaranteed to a dataset and its descendants. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations. .Pp This property can also be referred to by its shortened column name, .Sy reserv . .It Sy secondarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata Controls what is cached in the secondary cache .Pq L2ARC . If this property is set to .Sy all , then both user data and metadata is cached. If this property is set to .Sy none , then neither user data nor metadata is cached. If this property is set to .Sy metadata , then only metadata is cached. The default value is .Sy all . .It Sy setuid Ns = Ns Sy on Ns | Ns Sy off Controls whether the setuid bit is respected for the file system. The default value is .Sy on . The values .Sy on and .Sy off are equivalent to the .Sy suid and .Sy nosuid mount options. .It Sy sharesmb Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts Controls whether the file system is shared by using .Sy Samba USERSHARES and what options are to be used. Otherwise, the file system is automatically shared and unshared with the .Nm zfs Cm share and .Nm zfs Cm unshare commands. If the property is set to on, the .Xr net 8 command is invoked to create a .Sy USERSHARE . .Pp Because SMB shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be invalid in the resource name, are replaced with underscore (_) characters. Linux does not currently support additional options which might be available on Solaris. .Pp If the .Sy sharesmb property is set to .Sy off , the file systems are unshared. .Pp The share is created with the ACL (Access Control List) "Everyone:F" ("F" stands for "full permissions", i.e. read and write permissions) and no guest access (which means Samba must be able to authenticate a real user \(em .Xr passwd 5 Ns / Ns Xr shadow 5 Ns - , LDAP- or .Xr smbpasswd 5 Ns -based ) by default. This means that any additional access control (disallow specific user specific access etc) must be done on the underlying file system. .It Sy sharenfs Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts Controls whether the file system is shared via NFS, and what options are to be used. A file system with a .Sy sharenfs property of .Sy off is managed with the .Xr exportfs 8 command and entries in the .Pa /etc/exports file. Otherwise, the file system is automatically shared and unshared with the .Nm zfs Cm share and .Nm zfs Cm unshare commands. If the property is set to .Sy on , the dataset is shared using the default options: .Dl sec=sys,rw,crossmnt,no_subtree_check .Pp Please note that the options are comma-separated, unlike those found in .Xr exports 5 . This is done to negate the need for quoting, as well as to make parsing with scripts easier. .Pp See .Xr exports 5 for the meaning of the default options. Otherwise, the .Xr exportfs 8 command is invoked with options equivalent to the contents of this property. .Pp When the .Sy sharenfs property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously .Sy off , or if they were shared before the property was changed. If the new property is .Sy off , the file systems are unshared. .It Sy logbias Ns = Ns Sy latency Ns | Ns Sy throughput Provide a hint to ZFS about handling of synchronous requests in this dataset. If .Sy logbias is set to .Sy latency .Pq the default , ZFS will use pool log devices .Pq if configured to handle the requests at low latency. If .Sy logbias is set to .Sy throughput , ZFS will not use configured pool log devices. ZFS will instead optimize synchronous operations for global pool throughput and efficient use of resources. .It Sy snapdev Ns = Ns Sy hidden Ns | Ns Sy visible Controls whether the volume snapshot devices under .Pa /dev/zvol/ Ns Aq Ar pool are hidden or visible. The default value is .Sy hidden . .It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible Controls whether the .Pa .zfs directory is hidden or visible in the root of the file system as discussed in the .Sx Snapshots section of .Xr zfsconcepts 7 . The default value is .Sy hidden . .It Sy sync Ns = Ns Sy standard Ns | Ns Sy always Ns | Ns Sy disabled Controls the behavior of synchronous requests .Pq e.g. fsync, O_DSYNC . .Sy standard is the POSIX-specified behavior of ensuring all synchronous requests are written to stable storage and all devices are flushed to ensure data is not cached by device controllers .Pq this is the default . .Sy always causes every file system transaction to be written and flushed before its system call returns. This has a large performance penalty. .Sy disabled disables synchronous requests. File system transactions are only committed to stable storage periodically. This option will give the highest performance. However, it is very dangerous as ZFS would be ignoring the synchronous transaction demands of applications such as databases or NFS. Administrators should only use this option when the risks are understood. .It Sy version Ns = Ns Ar N Ns | Ns Sy current The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See the .Nm zfs Cm upgrade command. .It Sy volsize Ns = Ns Ar size For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. For storage pools with a version number of 9 or higher, a .Sy refreservation is set instead. Any changes to .Sy volsize are reflected in an equivalent change to the reservation .Pq or Sy refreservation . The .Sy volsize can only be set to a multiple of .Sy volblocksize , and cannot be zero. .Pp The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when the volume size is changed while it is in use .Pq particularly when shrinking the size . Extreme care should be used when adjusting the volume size. .Pp Though not recommended, a .Qq sparse volume .Po also known as .Qq thin provisioned .Pc can be created by specifying the .Fl s option to the .Nm zfs Cm create Fl V command, or by changing the value of the .Sy refreservation property .Po or .Sy reservation property on pool version 8 or earlier .Pc after the volume has been created. A .Qq sparse volume is a volume where the value of .Sy refreservation is less than the size of the volume plus the space required to store its metadata. Consequently, writes to a sparse volume can fail with .Er ENOSPC when the pool is low on space. For a sparse volume, changes to .Sy volsize are not reflected in the .Sy refreservation . A volume that is not sparse is said to be .Qq thick provisioned . A sparse volume can become thick provisioned by setting .Sy refreservation to .Sy auto . .It Sy volmode Ns = Ns Sy default Ns | Ns Sy full Ns | Ns Sy geom Ns | Ns Sy dev Ns | Ns Sy none This property specifies how volumes should be exposed to the OS. Setting it to .Sy full exposes volumes as fully fledged block devices, providing maximal functionality. The value .Sy geom is just an alias for .Sy full and is kept for compatibility. Setting it to .Sy dev hides its partitions. Volumes with property set to .Sy none are not exposed outside ZFS, but can be snapshotted, cloned, replicated, etc, that can be suitable for backup purposes. Value .Sy default means that volumes exposition is controlled by system-wide tunable .Sy zvol_volmode , where .Sy full , .Sy dev and .Sy none are encoded as 1, 2 and 3 respectively. The default value is .Sy full . .It Sy vscan Ns = Ns Sy on Ns | Ns Sy off Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is .Sy off . This property is not used by OpenZFS. .It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa Controls whether extended attributes are enabled for this file system. Two styles of extended attributes are supported: either directory-based or system-attribute-based. .Pp The default value of .Sy on enables directory-based extended attributes. This style of extended attribute imposes no practical limit on either the size or number of attributes which can be set on a file. Although under Linux the .Xr getxattr 2 and .Xr setxattr 2 system calls limit the maximum size to .Sy 64K . This is the most compatible style of extended attribute and is supported by all ZFS implementations. .Pp System-attribute-based xattrs can be enabled by setting the value to .Sy sa . The key advantage of this type of xattr is improved performance. Storing extended attributes as system attributes significantly decreases the amount of disk I/O required. Up to .Sy 64K of data may be stored per-file in the space reserved for system attributes. If there is not enough space available for an extended attribute then it will be automatically written as a directory-based xattr. System-attribute-based extended attributes are not accessible on platforms which do not support the .Sy xattr Ns = Ns Sy sa feature. OpenZFS supports .Sy xattr Ns = Ns Sy sa on both .Fx and Linux. .Pp The use of system-attribute-based xattrs is strongly encouraged for users of SELinux or POSIX ACLs. Both of these features heavily rely on extended attributes and benefit significantly from the reduced access time. .Pp The values .Sy on and .Sy off are equivalent to the .Sy xattr and .Sy noxattr mount options. .It Sy jailed Ns = Ns Sy off Ns | Ns Sy on Controls whether the dataset is managed from a jail. See .Xr zfs-jail 8 for more information. Jails are a .Fx feature and are not relevant on other platforms. The default value is .Sy off . .It Sy zoned Ns = Ns Sy on Ns | Ns Sy off Controls whether the dataset is managed from a non-global zone or namespace. The default value is .Sy off . .El .Pp The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the .Nm zfs Cm create or .Nm zpool Cm create commands, these properties are inherited from the parent dataset. If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties. .Bl -tag -width "" .It Xo .Sy casesensitivity Ns = Ns Sy sensitive Ns | Ns .Sy insensitive Ns | Ns Sy mixed .Xc Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the .Sy casesensitivity property is .Sy sensitive . Traditionally, .Ux and POSIX file systems have case-sensitive file names. .Pp The .Sy mixed value for the .Sy casesensitivity property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system that supports mixed behavior is limited to the SMB server product. For more information about the .Sy mixed value behavior, see the "ZFS Administration Guide". .It Xo .Sy normalization Ns = Ns Sy none Ns | Ns Sy formC Ns | Ns .Sy formD Ns | Ns Sy formKC Ns | Ns Sy formKD .Xc Indicates whether the file system should perform a .Sy unicode normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison process. If this property is set to a legal value other than .Sy none , and the .Sy utf8only property was left unspecified, the .Sy utf8only property is automatically set to .Sy on . The default value of the .Sy normalization property is .Sy none . This property cannot be changed after the file system is created. .It Sy utf8only Ns = Ns Sy on Ns | Ns Sy off Indicates whether the file system should reject file names that include characters that are not present in the .Sy UTF-8 character code set. If this property is explicitly set to .Sy off , the normalization property must either not be explicitly set or be set to .Sy none . The default value for the .Sy utf8only property is .Sy off . This property cannot be changed after the file system is created. .El .Pp The .Sy casesensitivity , .Sy normalization , and .Sy utf8only properties are also new permissions that can be assigned to non-privileged users by using the ZFS delegated administration feature. . .Ss Temporary Mount Point Properties When a file system is mounted, either through .Xr mount 8 for legacy mounts or the .Nm zfs Cm mount command for normal file systems, its mount options are set according to its properties. The correlation between properties and mount options is as follows: .Bl -tag -compact -offset Ds -width "rootcontext=" .It Sy atime atime/noatime .It Sy canmount auto/noauto .It Sy devices dev/nodev .It Sy exec exec/noexec .It Sy readonly ro/rw .It Sy relatime relatime/norelatime .It Sy setuid suid/nosuid .It Sy xattr xattr/noxattr .It Sy nbmand mand/nomand .It Sy context Ns = context= .It Sy fscontext Ns = fscontext= .It Sy defcontext Ns = defcontext= .It Sy rootcontext Ns = rootcontext= .El .Pp In addition, these options can be set on a per-mount basis using the .Fl o option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The .Sy nosuid option is an alias for .Sy nodevices , Ns Sy nosetuid . These properties are reported as .Qq temporary by the .Nm zfs Cm get command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings. . .Ss User Properties In addition to the standard native properties, ZFS supports arbitrary user properties. User properties have no effect on ZFS behavior, but applications or administrators can use them to annotate datasets .Pq file systems, volumes, and snapshots . .Pp User property names must contain a colon .Pq Qq Sy \&: character to distinguish them from native properties. They may contain lowercase letters, numbers, and the following punctuation characters: colon .Pq Qq Sy \&: , dash .Pq Qq Sy - , period .Pq Qq Sy \&. , and underscore .Pq Qq Sy _ . The expected convention is that the property name is divided into two portions such as .Ar module : Ns Ar property , but this namespace is not enforced by ZFS. User property names can be at most 256 characters, and cannot begin with a dash .Pq Qq Sy - . .Pp When making programmatic use of user properties, it is strongly suggested to use a reversed DNS domain name for the .Ar module component of property names to reduce the chance that two independently-developed packages use the same property name for different purposes. .Pp The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties .Po Nm zfs Cm list , .Nm zfs Cm get , .Nm zfs Cm set , and so forth .Pc can be used to manipulate both native properties and user properties. Use the .Nm zfs Cm inherit command to clear a user property. If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 8192 bytes. diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7 index 09f1e50decda..0ff1a9a52a11 100644 --- a/sys/contrib/openzfs/man/man7/zpool-features.7 +++ b/sys/contrib/openzfs/man/man7/zpool-features.7 @@ -1,916 +1,919 @@ .\" .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. .\" Copyright (c) 2014, Joyent, Inc. All rights reserved. .\" The contents of this file are subject to the terms of the Common Development .\" and Distribution License (the "License"). You may not use this file except .\" in compliance with the License. You can obtain a copy of the license at .\" usr/src/OPENSOLARIS.LICENSE or https://opensource.org/licenses/CDDL-1.0. .\" .\" See the License for the specific language governing permissions and .\" limitations under the License. When distributing Covered Code, include this .\" CDDL HEADER in each file and include the License file at .\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" Copyright (c) 2019, Klara Inc. .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley .\" .Dd June 23, 2022 .Dt ZPOOL-FEATURES 7 .Os . .Sh NAME .Nm zpool-features .Nd description of ZFS pool features . .Sh DESCRIPTION ZFS pool on-disk format versions are specified via .Dq features which replace the old on-disk format numbers .Pq the last supported on-disk format number is 28 . To enable a feature on a pool use the .Nm zpool Cm upgrade , or set the .Sy feature Ns @ Ns Ar feature-name property to .Sy enabled . Please also see the .Sx Compatibility feature sets section for information on how sets of features may be enabled together. .Pp The pool format does not affect file system version compatibility or the ability to send file systems between pools. .Pp Since most features can be enabled independently of each other, the on-disk format of the pool is specified by the set of all features marked as .Sy active on the pool. If the pool was created by another software version this set may include unsupported features. . .Ss Identifying features Every feature has a GUID of the form .Ar com.example : Ns Ar feature-name . The reversed DNS name ensures that the feature's GUID is unique across all ZFS implementations. When unsupported features are encountered on a pool they will be identified by their GUIDs. Refer to the documentation for the ZFS implementation that created the pool for information about those features. .Pp Each supported feature also has a short name. By convention a feature's short name is the portion of its GUID which follows the .Sq \&: .Po i.e. .Ar com.example : Ns Ar feature-name would have the short name .Ar feature-name .Pc , however a feature's short name may differ across ZFS implementations if following the convention would result in name conflicts. . .Ss Feature states Features can be in one of three states: .Bl -tag -width "disabled" .It Sy active This feature's on-disk format changes are in effect on the pool. Support for this feature is required to import the pool in read-write mode. If this feature is not read-only compatible, support is also required to import the pool in read-only mode .Pq see Sx Read-only compatibility . .It Sy enabled An administrator has marked this feature as enabled on the pool, but the feature's on-disk format changes have not been made yet. The pool can still be imported by software that does not support this feature, but changes may be made to the on-disk format at any time which will move the feature to the .Sy active state. Some features may support returning to the .Sy enabled state after becoming .Sy active . See feature-specific documentation for details. .It Sy disabled This feature's on-disk format changes have not been made and will not be made unless an administrator moves the feature to the .Sy enabled state. Features cannot be disabled once they have been enabled. .El .Pp The state of supported features is exposed through pool properties of the form .Sy feature Ns @ Ns Ar short-name . . .Ss Read-only compatibility Some features may make on-disk format changes that do not interfere with other software's ability to read from the pool. These features are referred to as .Dq read-only compatible . If all unsupported features on a pool are read-only compatible, the pool can be imported in read-only mode by setting the .Sy readonly property during import .Po see .Xr zpool-import 8 for details on importing pools .Pc . . .Ss Unsupported features For each unsupported feature enabled on an imported pool, a pool property named .Sy unsupported Ns @ Ns Ar feature-name will indicate why the import was allowed despite the unsupported feature. Possible values for this property are: .Bl -tag -width "readonly" .It Sy inactive The feature is in the .Sy enabled state and therefore the pool's on-disk format is still compatible with software that does not support this feature. .It Sy readonly The feature is read-only compatible and the pool has been imported in read-only mode. .El . .Ss Feature dependencies Some features depend on other features being enabled in order to function. Enabling a feature will automatically enable any features it depends on. . .Ss Compatibility feature sets It is sometimes necessary for a pool to maintain compatibility with a specific on-disk format, by enabling and disabling particular features. The .Sy compatibility feature facilitates this by allowing feature sets to be read from text files. When set to .Sy off .Pq the default , compatibility feature sets are disabled .Pq i.e. all features are enabled ; when set to .Sy legacy , no features are enabled. When set to a comma-separated list of filenames .Po each filename may either be an absolute path, or relative to .Pa /etc/zfs/compatibility.d or .Pa /usr/share/zfs/compatibility.d .Pc , the lists of requested features are read from those files, separated by whitespace and/or commas. Only features present in all files are enabled. .Pp Simple sanity checks are applied to the files: they must be between 1 B and 16 KiB in size, and must end with a newline character. .Pp The requested features are applied when a pool is created using .Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar … and controls which features are enabled when using .Nm zpool Cm upgrade . .Nm zpool Cm status will not show a warning about disabled features which are not part of the requested feature set. .Pp The special value .Sy legacy prevents any features from being enabled, either via .Nm zpool Cm upgrade or .Nm zpool Cm set Sy feature Ns @ Ns Ar feature-name Ns = Ns Sy enabled . This setting also prevents pools from being upgraded to newer on-disk versions. This is a safety measure to prevent new features from being accidentally enabled, breaking compatibility. .Pp By convention, compatibility files in .Pa /usr/share/zfs/compatibility.d are provided by the distribution, and include feature sets supported by important versions of popular distributions, and feature sets commonly supported at the start of each year. Compatibility files in .Pa /etc/zfs/compatibility.d , if present, will take precedence over files with the same name in .Pa /usr/share/zfs/compatibility.d . .Pp If an unrecognized feature is found in these files, an error message will be shown. If the unrecognized feature is in a file in .Pa /etc/zfs/compatibility.d , this is treated as an error and processing will stop. If the unrecognized feature is under .Pa /usr/share/zfs/compatibility.d , this is treated as a warning and processing will continue. This difference is to allow distributions to include features which might not be recognized by the currently-installed binaries. .Pp Compatibility files may include comments: any text from .Sq # to the end of the line is ignored. .Pp .Sy Example : .Bd -literal -compact -offset 4n .No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2 # Features which are supported by GRUB2 async_destroy bookmarks embedded_data empty_bpobj enabled_txg extensible_dataset filesystem_limits hole_birth large_blocks lz4_compress spacemap_histogram .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev .Ed .Pp See .Xr zpool-create 8 and .Xr zpool-upgrade 8 for more information on how these commands are affected by feature sets. . .de feature .It Sy \\$2 .Bl -tag -compact -width "READ-ONLY COMPATIBLE" .It GUID .Sy \\$1:\\$2 .if !"\\$4"" \{\ .It DEPENDENCIES \fB\\$4\fP\c .if !"\\$5"" , \fB\\$5\fP\c .if !"\\$6"" , \fB\\$6\fP\c .if !"\\$7"" , \fB\\$7\fP\c .if !"\\$8"" , \fB\\$8\fP\c .if !"\\$9"" , \fB\\$9\fP\c .\} .It READ-ONLY COMPATIBLE \\$3 .El .Pp .. . .ds instant-never \ .No This feature becomes Sy active No as soon as it is enabled \ and will never return to being Sy enabled . . .ds remount-upgrade \ .No Each filesystem will be upgraded automatically when remounted, \ or when a new file is created under that filesystem. \ The upgrade can also be triggered on filesystems via \ Nm zfs Cm set Sy version Ns = Ns Sy current Ar fs . \ No The upgrade process runs in the background and may take a while to complete \ for filesystems containing large amounts of files. . .de checksum-spiel When the .Sy \\$1 feature is set to .Sy enabled , the administrator can turn on the .Sy \\$1 checksum on any dataset using .Nm zfs Cm set Sy checksum Ns = Ns Sy \\$1 Ar dset .Po see Xr zfs-set 8 Pc . This feature becomes .Sy active once a .Sy checksum property has been set to .Sy \\$1 , and will return to being .Sy enabled once all filesystems that have ever had their checksum set to .Sy \\$1 are destroyed. .. . .Sh FEATURES The following features are supported on this system: .Bl -tag -width Ds .feature org.zfsonlinux allocation_classes yes This feature enables support for separate allocation classes. .Pp This feature becomes .Sy active when a dedicated allocation class vdev .Pq dedup or special is created with the .Nm zpool Cm create No or Nm zpool Cm add No commands . With device removal, it can be returned to the .Sy enabled state if all the dedicated allocation class vdevs are removed. . .feature com.delphix async_destroy yes Destroying a file system requires traversing all of its data in order to return its used space to the pool. Without .Sy async_destroy , the file system is not fully removed until all space has been reclaimed. If the destroy operation is interrupted by a reboot or power outage, the next attempt to open the pool will need to complete the destroy operation synchronously. .Pp When .Sy async_destroy is enabled, the file system's data will be reclaimed by a background process, allowing the destroy operation to complete without traversing the entire file system. The background process is able to resume interrupted destroys after the pool has been opened, eliminating the need to finish interrupted destroys as part of the open operation. The amount of space remaining to be reclaimed by the background process is available through the .Sy freeing property. .Pp This feature is only .Sy active while .Sy freeing is non-zero. . .feature org.openzfs blake3 no extensible_dataset This feature enables the use of the BLAKE3 hash algorithm for checksum and dedup. BLAKE3 is a secure hash algorithm focused on high performance. .Pp .checksum-spiel blake3 . .feature com.delphix bookmarks yes extensible_dataset This feature enables use of the .Nm zfs Cm bookmark command. .Pp This feature is .Sy active while any bookmarks exist in the pool. All bookmarks in the pool can be listed by running .Nm zfs Cm list Fl t Sy bookmark Fl r Ar poolname . . .feature com.datto bookmark_v2 no bookmark extensible_dataset This feature enables the creation and management of larger bookmarks which are needed for other features in ZFS. .Pp This feature becomes .Sy active when a v2 bookmark is created and will be returned to the .Sy enabled state when all v2 bookmarks are destroyed. . .feature com.delphix bookmark_written no bookmark extensible_dataset bookmark_v2 This feature enables additional bookmark accounting fields, enabling the .Sy written Ns # Ns Ar bookmark property .Pq space written since a bookmark and estimates of send stream sizes for incrementals from bookmarks. .Pp This feature becomes .Sy active when a bookmark is created and will be returned to the .Sy enabled state when all bookmarks with these fields are destroyed. . .feature org.openzfs device_rebuild yes This feature enables the ability for the .Nm zpool Cm attach and .Nm zpool Cm replace commands to perform sequential reconstruction .Pq instead of healing reconstruction when resilvering. .Pp Sequential reconstruction resilvers a device in LBA order without immediately verifying the checksums. Once complete, a scrub is started, which then verifies the checksums. This approach allows full redundancy to be restored to the pool in the minimum amount of time. This two-phase approach will take longer than a healing resilver when the time to verify the checksums is included. However, unless there is additional pool damage, no checksum errors should be reported by the scrub. This feature is incompatible with raidz configurations. . This feature becomes .Sy active while a sequential resilver is in progress, and returns to .Sy enabled when the resilver completes. . .feature com.delphix device_removal no This feature enables the .Nm zpool Cm remove command to remove top-level vdevs, evacuating them to reduce the total size of the pool. .Pp This feature becomes .Sy active when the .Nm zpool Cm remove command is used on a top-level vdev, and will never return to being .Sy enabled . . .feature org.openzfs draid no This feature enables use of the .Sy draid vdev type. dRAID is a variant of RAID-Z which provides integrated distributed hot spares that allow faster resilvering while retaining the benefits of RAID-Z. Data, parity, and spare space are organized in redundancy groups and distributed evenly over all of the devices. .Pp This feature becomes .Sy active when creating a pool which uses the .Sy draid vdev type, or when adding a new .Sy draid vdev to an existing pool. . .feature org.illumos edonr no extensible_dataset This feature enables the use of the Edon-R hash algorithm for checksum, including for nopwrite .Po if compression is also enabled, an overwrite of a block whose checksum matches the data being written will be ignored .Pc . In an abundance of caution, Edon-R requires verification when used with dedup: .Nm zfs Cm set Sy dedup Ns = Ns Sy edonr , Ns Sy verify .Po see Xr zfs-set 8 Pc . .Pp Edon-R is a very high-performance hash algorithm that was part of the NIST SHA-3 competition. It provides extremely high hash performance .Pq over 350% faster than SHA-256 , but was not selected because of its unsuitability as a general purpose secure hash algorithm. This implementation utilizes the new salted checksumming functionality in ZFS, which means that the checksum is pre-seeded with a secret 256-bit random key .Pq stored on the pool before being fed the data block to be checksummed. Thus the produced checksums are unique to a given pool, preventing hash collision attacks on systems with dedup. .Pp .checksum-spiel edonr . .feature com.delphix embedded_data no This feature improves the performance and compression ratio of highly-compressible blocks. Blocks whose contents can compress to 112 bytes or smaller can take advantage of this feature. .Pp When this feature is enabled, the contents of highly-compressible blocks are stored in the block .Dq pointer itself .Po a misnomer in this case, as it contains the compressed data, rather than a pointer to its location on disk .Pc . Thus the space of the block .Pq one sector, typically 512 B or 4 KiB is saved, and no additional I/O is needed to read and write the data block. . \*[instant-never] . .feature com.delphix empty_bpobj yes This feature increases the performance of creating and using a large number of snapshots of a single filesystem or volume, and also reduces the disk space required. .Pp When there are many snapshots, each snapshot uses many Block Pointer Objects .Pq bpobjs to track blocks associated with that snapshot. However, in common use cases, most of these bpobjs are empty. This feature allows us to create each bpobj on-demand, thus eliminating the empty bpobjs. .Pp This feature is .Sy active while there are any filesystems, volumes, or snapshots which were created after enabling this feature. . .feature com.delphix enabled_txg yes Once this feature is enabled, ZFS records the transaction group number in which new features are enabled. This has no user-visible impact, but other features may depend on this feature. .Pp This feature becomes .Sy active as soon as it is enabled and will never return to being .Sy enabled . . .feature com.datto encryption no bookmark_v2 extensible_dataset This feature enables the creation and management of natively encrypted datasets. .Pp This feature becomes .Sy active when an encrypted dataset is created and will be returned to the .Sy enabled state when all datasets that use this feature are destroyed. . .feature com.delphix extensible_dataset no This feature allows more flexible use of internal ZFS data structures, and exists for other features to depend on. .Pp This feature will be .Sy active when the first dependent feature uses it, and will be returned to the .Sy enabled state when all datasets that use this feature are destroyed. . .feature com.joyent filesystem_limits yes extensible_dataset This feature enables filesystem and snapshot limits. These limits can be used to control how many filesystems and/or snapshots can be created at the point in the tree on which the limits are set. .Pp This feature is .Sy active once either of the limit properties has been set on a dataset and will never return to being .Sy enabled . . .feature com.delphix head_errlog no This feature enables the upgraded version of errlog, which required an on-disk error log format change. Now the error log of each head dataset is stored separately in the zap object and keyed by the head id. +In case of encrypted filesystems with unloaded keys or unmounted encrypted +filesystems we are unable to check their snapshots or clones for errors and +these will not be reported. With this feature enabled, every dataset affected by an error block is listed in the output of .Nm zpool Cm status . .Pp \*[instant-never] . .feature com.delphix hole_birth no enabled_txg This feature has/had bugs, the result of which is that, if you do a .Nm zfs Cm send Fl i .Pq or Fl R , No since it uses Fl i from an affected dataset, the receiving party will not see any checksum or other errors, but the resulting destination snapshot will not match the source. Its use by .Nm zfs Cm send Fl i has been disabled by default .Po see .Sy send_holes_without_birth_time in .Xr zfs 4 .Pc . .Pp This feature improves performance of incremental sends .Pq Nm zfs Cm send Fl i and receives for objects with many holes. The most common case of hole-filled objects is zvols. .Pp An incremental send stream from snapshot .Sy A No to snapshot Sy B contains information about every block that changed between .Sy A No and Sy B . Blocks which did not change between those snapshots can be identified and omitted from the stream using a piece of metadata called the .Dq block birth time , but birth times are not recorded for holes .Pq blocks filled only with zeroes . Since holes created after .Sy A No cannot be distinguished from holes created before Sy A , information about every hole in the entire filesystem or zvol is included in the send stream. .Pp For workloads where holes are rare this is not a problem. However, when incrementally replicating filesystems or zvols with many holes .Pq for example a zvol formatted with another filesystem a lot of time will be spent sending and receiving unnecessary information about holes that already exist on the receiving side. .Pp Once the .Sy hole_birth feature has been enabled the block birth times of all new holes will be recorded. Incremental sends between snapshots created after this feature is enabled will use this new metadata to avoid sending information about holes that already exist on the receiving side. .Pp \*[instant-never] . .feature org.open-zfs large_blocks no extensible_dataset This feature allows the record size on a dataset to be set larger than 128 KiB. .Pp This feature becomes .Sy active once a dataset contains a file with a block size larger than 128 KiB, and will return to being .Sy enabled once all filesystems that have ever had their recordsize larger than 128 KiB are destroyed. . .feature org.zfsonlinux large_dnode no extensible_dataset This feature allows the size of dnodes in a dataset to be set larger than 512 B. . This feature becomes .Sy active once a dataset contains an object with a dnode larger than 512 B, which occurs as a result of setting the .Sy dnodesize dataset property to a value other than .Sy legacy . The feature will return to being .Sy enabled once all filesystems that have ever contained a dnode larger than 512 B are destroyed. Large dnodes allow more data to be stored in the bonus buffer, thus potentially improving performance by avoiding the use of spill blocks. . .feature com.delphix livelist yes This feature allows clones to be deleted faster than the traditional method when a large number of random/sparse writes have been made to the clone. All blocks allocated and freed after a clone is created are tracked by the the clone's livelist which is referenced during the deletion of the clone. The feature is activated when a clone is created and remains .Sy active until all clones have been destroyed. . .feature com.delphix log_spacemap yes com.delphix:spacemap_v2 This feature improves performance for heavily-fragmented pools, especially when workloads are heavy in random-writes. It does so by logging all the metaslab changes on a single spacemap every TXG instead of scattering multiple writes to all the metaslab spacemaps. .Pp \*[instant-never] . .feature org.illumos lz4_compress no .Sy lz4 is a high-performance real-time compression algorithm that features significantly faster compression and decompression as well as a higher compression ratio than the older .Sy lzjb compression. Typically, .Sy lz4 compression is approximately 50% faster on compressible data and 200% faster on incompressible data than .Sy lzjb . It is also approximately 80% faster on decompression, while giving approximately a 10% better compression ratio. .Pp When the .Sy lz4_compress feature is set to .Sy enabled , the administrator can turn on .Sy lz4 compression on any dataset on the pool using the .Xr zfs-set 8 command. All newly written metadata will be compressed with the .Sy lz4 algorithm. .Pp \*[instant-never] . .feature com.joyent multi_vdev_crash_dump no This feature allows a dump device to be configured with a pool comprised of multiple vdevs. Those vdevs may be arranged in any mirrored or raidz configuration. .Pp When the .Sy multi_vdev_crash_dump feature is set to .Sy enabled , the administrator can use .Xr dumpadm 8 to configure a dump device on a pool comprised of multiple vdevs. .Pp Under .Fx and Linux this feature is unused, but registered for compatibility. New pools created on these systems will have the feature .Sy enabled but will never transition to .Sy active , as this functionality is not required for crash dump support. Existing pools where this feature is .Sy active can be imported. . .feature com.delphix obsolete_counts yes device_removal This feature is an enhancement of .Sy device_removal , which will over time reduce the memory used to track removed devices. When indirect blocks are freed or remapped, we note that their part of the indirect mapping is .Dq obsolete – no longer needed. .Pp This feature becomes .Sy active when the .Nm zpool Cm remove command is used on a top-level vdev, and will never return to being .Sy enabled . . .feature org.zfsonlinux project_quota yes extensible_dataset This feature allows administrators to account the spaces and objects usage information against the project identifier .Pq ID . .Pp The project ID is an object-based attribute. When upgrading an existing filesystem, objects without a project ID will be assigned a zero project ID. When this feature is enabled, newly created objects inherit their parent directories' project ID if the parent's inherit flag is set .Pq via Nm chattr Sy [+-]P No or Nm zfs Cm project Fl s Ns | Ns Fl C . Otherwise, the new object's project ID will be zero. An object's project ID can be changed at any time by the owner .Pq or privileged user via .Nm chattr Fl p Ar prjid or .Nm zfs Cm project Fl p Ar prjid . .Pp This feature will become .Sy active as soon as it is enabled and will never return to being .Sy disabled . \*[remount-upgrade] . .feature com.delphix redaction_bookmarks no bookmarks extensible_dataset This feature enables the use of redacted .Nm zfs Cm send Ns s , which create redaction bookmarks storing the list of blocks redacted by the send that created them. For more information about redacted sends, see .Xr zfs-send 8 . . .feature com.delphix redacted_datasets no extensible_dataset This feature enables the receiving of redacted .Nm zfs Cm send streams, which create redacted datasets when received. These datasets are missing some of their blocks, and so cannot be safely mounted, and their contents cannot be safely read. For more information about redacted receives, see .Xr zfs-send 8 . . .feature com.datto resilver_defer yes This feature allows ZFS to postpone new resilvers if an existing one is already in progress. Without this feature, any new resilvers will cause the currently running one to be immediately restarted from the beginning. .Pp This feature becomes .Sy active once a resilver has been deferred, and returns to being .Sy enabled when the deferred resilver begins. . .feature org.illumos sha512 no extensible_dataset This feature enables the use of the SHA-512/256 truncated hash algorithm .Pq FIPS 180-4 for checksum and dedup. The native 64-bit arithmetic of SHA-512 provides an approximate 50% performance boost over SHA-256 on 64-bit hardware and is thus a good minimum-change replacement candidate for systems where hash performance is important, but these systems cannot for whatever reason utilize the faster .Sy skein No and Sy edonr algorithms. .Pp .checksum-spiel sha512 . .feature org.illumos skein no extensible_dataset This feature enables the use of the Skein hash algorithm for checksum and dedup. Skein is a high-performance secure hash algorithm that was a finalist in the NIST SHA-3 competition. It provides a very high security margin and high performance on 64-bit hardware .Pq 80% faster than SHA-256 . This implementation also utilizes the new salted checksumming functionality in ZFS, which means that the checksum is pre-seeded with a secret 256-bit random key .Pq stored on the pool before being fed the data block to be checksummed. Thus the produced checksums are unique to a given pool, preventing hash collision attacks on systems with dedup. .Pp .checksum-spiel skein . .feature com.delphix spacemap_histogram yes This features allows ZFS to maintain more information about how free space is organized within the pool. If this feature is .Sy enabled , it will be activated when a new space map object is created, or an existing space map is upgraded to the new format, and never returns back to being .Sy enabled . . .feature com.delphix spacemap_v2 yes This feature enables the use of the new space map encoding which consists of two words .Pq instead of one whenever it is advantageous. The new encoding allows space maps to represent large regions of space more efficiently on-disk while also increasing their maximum addressable offset. .Pp This feature becomes .Sy active once it is .Sy enabled , and never returns back to being .Sy enabled . . .feature org.zfsonlinux userobj_accounting yes extensible_dataset This feature allows administrators to account the object usage information by user and group. .Pp \*[instant-never] \*[remount-upgrade] . .feature org.openzfs zilsaxattr yes extensible_dataset This feature enables .Sy xattr Ns = Ns Sy sa extended attribute logging in the ZIL. If enabled, extended attribute changes .Pq both Sy xattrdir Ns = Ns Sy dir No and Sy xattr Ns = Ns Sy sa are guaranteed to be durable if either the dataset had .Sy sync Ns = Ns Sy always set at the time the changes were made, or .Xr sync 2 is called on the dataset after the changes were made. .Pp This feature becomes .Sy active when a ZIL is created for at least one dataset and will be returned to the .Sy enabled state when it is destroyed for all datasets that use this feature. . .feature com.delphix zpool_checkpoint yes This feature enables the .Nm zpool Cm checkpoint command that can checkpoint the state of the pool at the time it was issued and later rewind back to it or discard it. .Pp This feature becomes .Sy active when the .Nm zpool Cm checkpoint command is used to checkpoint the pool. The feature will only return back to being .Sy enabled when the pool is rewound or the checkpoint has been discarded. . .feature org.freebsd zstd_compress no extensible_dataset .Sy zstd is a high-performance compression algorithm that features a combination of high compression ratios and high speed. Compared to .Sy gzip , .Sy zstd offers slightly better compression at much higher speeds. Compared to .Sy lz4 , .Sy zstd offers much better compression while being only modestly slower. Typically, .Sy zstd compression speed ranges from 250 to 500 MB/s per thread and decompression speed is over 1 GB/s per thread. .Pp When the .Sy zstd feature is set to .Sy enabled , the administrator can turn on .Sy zstd compression of any dataset using .Nm zfs Cm set Sy compress Ns = Ns Sy zstd Ar dset .Po see Xr zfs-set 8 Pc . This feature becomes .Sy active once a .Sy compress property has been set to .Sy zstd , and will return to being .Sy enabled once all filesystems that have ever had their .Sy compress property set to .Sy zstd are destroyed. .El . .Sh SEE ALSO .Xr zfs 8 , .Xr zpool 8 diff --git a/sys/contrib/openzfs/man/man8/zed.8.in b/sys/contrib/openzfs/man/man8/zed.8.in index 6c51f10695cc..00f23edec375 100644 --- a/sys/contrib/openzfs/man/man8/zed.8.in +++ b/sys/contrib/openzfs/man/man8/zed.8.in @@ -1,255 +1,267 @@ .\" .\" This file is part of the ZFS Event Daemon (ZED). .\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). .\" Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. .\" Refer to the OpenZFS git commit log for authoritative copyright attribution. .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License Version 1.0 (CDDL-1.0). .\" You can obtain a copy of the license from the top-level file .\" "OPENSOLARIS.LICENSE" or at . .\" You may not use this file except in compliance with the license. .\" .\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) .\" .Dd May 26, 2021 .Dt ZED 8 .Os . .Sh NAME .Nm ZED .Nd ZFS Event Daemon .Sh SYNOPSIS .Nm .Op Fl fFhILMvVZ .Op Fl d Ar zedletdir .Op Fl p Ar pidfile .Op Fl P Ar path .Op Fl s Ar statefile .Op Fl j Ar jobs +.Op Fl b Ar buflen . .Sh DESCRIPTION The .Nm (ZFS Event Daemon) monitors events generated by the ZFS kernel module. When a zevent (ZFS Event) is posted, the .Nm will run any ZEDLETs (ZFS Event Daemon Linkage for Executable Tasks) that have been enabled for the corresponding zevent class. . .Sh OPTIONS .Bl -tag -width "-h" .It Fl h Display a summary of the command-line options. .It Fl L Display license information. .It Fl V Display version information. .It Fl v Be verbose. .It Fl f Force the daemon to run if at all possible, disabling security checks and throwing caution to the wind. Not recommended for use in production. .It Fl F Don't daemonise: remain attached to the controlling terminal, log to the standard I/O streams. .It Fl M Lock all current and future pages in the virtual memory address space. This may help the daemon remain responsive when the system is under heavy memory pressure. .It Fl I Request that the daemon idle rather than exit when the kernel modules are not loaded. Processing of events will start, or resume, when the kernel modules are (re)loaded. Under Linux the kernel modules cannot be unloaded while the daemon is running. .It Fl Z Zero the daemon's state, thereby allowing zevents still within the kernel to be reprocessed. .It Fl d Ar zedletdir Read the enabled ZEDLETs from the specified directory. .It Fl p Ar pidfile Write the daemon's process ID to the specified file. .It Fl P Ar path Custom .Ev $PATH for zedlets to use. Normally zedlets run in a locked-down environment, with hardcoded paths to the ZFS commands .Pq Ev $ZFS , $ZPOOL , $ZED , … , and a hard-coded .Ev $PATH . This is done for security reasons. However, the ZFS test suite uses a custom PATH for its ZFS commands, and passes it to .Nm with .Fl P . In short, .Fl P is only to be used by the ZFS test suite; never use it in production! .It Fl s Ar statefile Write the daemon's state to the specified file. .It Fl j Ar jobs Allow at most .Ar jobs ZEDLETs to run concurrently, delaying execution of new ones until they finish. Defaults to .Sy 16 . +.It Fl b Ar buflen +Cap kernel event buffer growth to +.Ar buflen +entries. +This buffer is grown when the daemon misses an event, but results in +unreclaimable memory use in the kernel. +A value of +.Sy 0 +removes the cap. +Defaults to +.Sy 1048576 . .El .Sh ZEVENTS A zevent is comprised of a list of nvpairs (name/value pairs). Each zevent contains an EID (Event IDentifier) that uniquely identifies it throughout the lifetime of the loaded ZFS kernel module; this EID is a monotonically increasing integer that resets to 1 each time the kernel module is loaded. Each zevent also contains a class string that identifies the type of event. For brevity, a subclass string is defined that omits the leading components of the class string. Additional nvpairs exist to provide event details. .Pp The kernel maintains a list of recent zevents that can be viewed (along with their associated lists of nvpairs) using the .Nm zpool Cm events Fl v command. . .Sh CONFIGURATION ZEDLETs to be invoked in response to zevents are located in the .Em enabled-zedlets directory .Pq Ar zedletdir . These can be symlinked or copied from the .Em installed-zedlets directory; symlinks allow for automatic updates from the installed ZEDLETs, whereas copies preserve local modifications. As a security measure, since ownership change is a privileged operation, ZEDLETs must be owned by root. They must have execute permissions for the user, but they must not have write permissions for group or other. Dotfiles are ignored. .Pp ZEDLETs are named after the zevent class for which they should be invoked. In particular, a ZEDLET will be invoked for a given zevent if either its class or subclass string is a prefix of its filename (and is followed by a non-alphabetic character). As a special case, the prefix .Sy all matches all zevents. Multiple ZEDLETs may be invoked for a given zevent. . .Sh ZEDLETS ZEDLETs are executables invoked by the ZED in response to a given zevent. They should be written under the presumption they can be invoked concurrently, and they should use appropriate locking to access any shared resources. Common variables used by ZEDLETs can be stored in the default rc file which is sourced by scripts; these variables should be prefixed with .Sy ZED_ . .Pp The zevent nvpairs are passed to ZEDLETs as environment variables. Each nvpair name is converted to an environment variable in the following manner: .Bl -enum -compact .It it is prefixed with .Sy ZEVENT_ , .It it is converted to uppercase, and .It each non-alphanumeric character is converted to an underscore. .El .Pp Some additional environment variables have been defined to present certain nvpair values in a more convenient form. An incomplete list of zevent environment variables is as follows: .Bl -tag -compact -width "ZEVENT_TIME_STRING" .It Sy ZEVENT_EID The Event IDentifier. .It Sy ZEVENT_CLASS The zevent class string. .It Sy ZEVENT_SUBCLASS The zevent subclass string. .It Sy ZEVENT_TIME The time at which the zevent was posted as .Dq Em seconds nanoseconds since the Epoch. .It Sy ZEVENT_TIME_SECS The .Em seconds component of .Sy ZEVENT_TIME . .It Sy ZEVENT_TIME_NSECS The .Em nanoseconds component of .Sy ZEVENT_TIME . .It Sy ZEVENT_TIME_STRING An almost-RFC3339-compliant string for .Sy ZEVENT_TIME . .El .Pp Additionally, the following ZED & ZFS variables are defined: .Bl -tag -compact -width "ZEVENT_TIME_STRING" .It Sy ZED_PID The daemon's process ID. .It Sy ZED_ZEDLET_DIR The daemon's current .Em enabled-zedlets directory. .It Sy ZFS_ALIAS The alias .Pq Dq Em name Ns - Ns Em version Ns - Ns Em release string of the ZFS distribution the daemon is part of. .It Sy ZFS_VERSION The ZFS version the daemon is part of. .It Sy ZFS_RELEASE The ZFS release the daemon is part of. .El .Pp ZEDLETs may need to call other ZFS commands. The installation paths of the following executables are defined as environment variables: .Sy ZDB , .Sy ZED , .Sy ZFS , .Sy ZINJECT , and .Sy ZPOOL . These variables may be overridden in the rc file. . .Sh FILES .Bl -tag -width "-c" .It Pa @sysconfdir@/zfs/zed.d The default directory for enabled ZEDLETs. .It Pa @sysconfdir@/zfs/zed.d/zed.rc The default rc file for common variables used by ZEDLETs. .It Pa @zfsexecdir@/zed.d The default directory for installed ZEDLETs. .It Pa @runstatedir@/zed.pid The default file containing the daemon's process ID. .It Pa @runstatedir@/zed.state The default file containing the daemon's state. .El . .Sh SIGNALS .Bl -tag -width "-c" .It Sy SIGHUP Reconfigure the daemon and rescan the directory for enabled ZEDLETs. .It Sy SIGTERM , SIGINT Terminate the daemon. .El . .Sh SEE ALSO .Xr zfs 8 , .Xr zpool 8 , .Xr zpool-events 8 . .Sh NOTES The .Nm requires root privileges. .Pp Do not taunt the .Nm . . .Sh BUGS ZEDLETs are unable to return state/status information to the kernel. .Pp Internationalization support via gettext has not been added. diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c index 2a58088a78e6..0e91304ecd4b 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c @@ -1,1070 +1,1070 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright 2016, Joyent, Inc. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include #include "zfs_prop.h" #include "zfs_deleg.h" #include "zfs_fletcher.h" #if !defined(_KERNEL) #include #include #include #endif static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS]; /* Note this is indexed by zfs_userquota_prop_t, keep the order the same */ const char *const zfs_userquota_prop_prefixes[] = { "userused@", "userquota@", "groupused@", "groupquota@", "userobjused@", "userobjquota@", "groupobjused@", "groupobjquota@", "projectused@", "projectquota@", "projectobjused@", "projectobjquota@" }; zprop_desc_t * zfs_prop_get_table(void) { return (zfs_prop_table); } void zfs_prop_init(void) { static const zprop_index_t checksum_table[] = { { "on", ZIO_CHECKSUM_ON }, { "off", ZIO_CHECKSUM_OFF }, { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, { "sha256", ZIO_CHECKSUM_SHA256 }, { "noparity", ZIO_CHECKSUM_NOPARITY }, { "sha512", ZIO_CHECKSUM_SHA512 }, { "skein", ZIO_CHECKSUM_SKEIN }, { "edonr", ZIO_CHECKSUM_EDONR }, { "blake3", ZIO_CHECKSUM_BLAKE3 }, { NULL } }; static const zprop_index_t dedup_table[] = { { "on", ZIO_CHECKSUM_ON }, { "off", ZIO_CHECKSUM_OFF }, { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY }, { "sha256", ZIO_CHECKSUM_SHA256 }, { "sha256,verify", ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, { "sha512", ZIO_CHECKSUM_SHA512 }, { "sha512,verify", ZIO_CHECKSUM_SHA512 | ZIO_CHECKSUM_VERIFY }, { "skein", ZIO_CHECKSUM_SKEIN }, { "skein,verify", ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY }, { "edonr,verify", ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY }, { "blake3", ZIO_CHECKSUM_BLAKE3 }, { "blake3,verify", ZIO_CHECKSUM_BLAKE3 | ZIO_CHECKSUM_VERIFY }, { NULL } }; static const zprop_index_t compress_table[] = { { "on", ZIO_COMPRESS_ON }, { "off", ZIO_COMPRESS_OFF }, { "lzjb", ZIO_COMPRESS_LZJB }, { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */ { "gzip-1", ZIO_COMPRESS_GZIP_1 }, { "gzip-2", ZIO_COMPRESS_GZIP_2 }, { "gzip-3", ZIO_COMPRESS_GZIP_3 }, { "gzip-4", ZIO_COMPRESS_GZIP_4 }, { "gzip-5", ZIO_COMPRESS_GZIP_5 }, { "gzip-6", ZIO_COMPRESS_GZIP_6 }, { "gzip-7", ZIO_COMPRESS_GZIP_7 }, { "gzip-8", ZIO_COMPRESS_GZIP_8 }, { "gzip-9", ZIO_COMPRESS_GZIP_9 }, { "zle", ZIO_COMPRESS_ZLE }, { "lz4", ZIO_COMPRESS_LZ4 }, { "zstd", ZIO_COMPRESS_ZSTD }, { "zstd-fast", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_DEFAULT) }, /* * ZSTD 1-19 are synthetic. We store the compression level in a * separate hidden property to avoid wasting a large amount of * space in the ZIO_COMPRESS enum. * * The compression level is also stored within the header of the * compressed block since we may need it for later recompression * to avoid checksum errors (L2ARC). * * Note that the level here is defined as bit shifted mask on * top of the method. */ { "zstd-1", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_1) }, { "zstd-2", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_2) }, { "zstd-3", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_3) }, { "zstd-4", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_4) }, { "zstd-5", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_5) }, { "zstd-6", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_6) }, { "zstd-7", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_7) }, { "zstd-8", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_8) }, { "zstd-9", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_9) }, { "zstd-10", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_10) }, { "zstd-11", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_11) }, { "zstd-12", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_12) }, { "zstd-13", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_13) }, { "zstd-14", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_14) }, { "zstd-15", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_15) }, { "zstd-16", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_16) }, { "zstd-17", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_17) }, { "zstd-18", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_18) }, { "zstd-19", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_19) }, /* * The ZSTD-Fast levels are also synthetic. */ { "zstd-fast-1", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1) }, { "zstd-fast-2", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_2) }, { "zstd-fast-3", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_3) }, { "zstd-fast-4", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_4) }, { "zstd-fast-5", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_5) }, { "zstd-fast-6", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_6) }, { "zstd-fast-7", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_7) }, { "zstd-fast-8", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_8) }, { "zstd-fast-9", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_9) }, { "zstd-fast-10", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_10) }, { "zstd-fast-20", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_20) }, { "zstd-fast-30", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_30) }, { "zstd-fast-40", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_40) }, { "zstd-fast-50", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_50) }, { "zstd-fast-60", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_60) }, { "zstd-fast-70", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_70) }, { "zstd-fast-80", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_80) }, { "zstd-fast-90", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_90) }, { "zstd-fast-100", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_100) }, { "zstd-fast-500", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_500) }, { "zstd-fast-1000", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1000) }, { NULL } }; static const zprop_index_t crypto_table[] = { { "on", ZIO_CRYPT_ON }, { "off", ZIO_CRYPT_OFF }, { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM }, { "aes-192-ccm", ZIO_CRYPT_AES_192_CCM }, { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM }, { "aes-128-gcm", ZIO_CRYPT_AES_128_GCM }, { "aes-192-gcm", ZIO_CRYPT_AES_192_GCM }, { "aes-256-gcm", ZIO_CRYPT_AES_256_GCM }, { NULL } }; static const zprop_index_t keyformat_table[] = { { "none", ZFS_KEYFORMAT_NONE }, { "raw", ZFS_KEYFORMAT_RAW }, { "hex", ZFS_KEYFORMAT_HEX }, { "passphrase", ZFS_KEYFORMAT_PASSPHRASE }, { NULL } }; static const zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, { NULL } }; static const zprop_index_t snapdev_table[] = { { "hidden", ZFS_SNAPDEV_HIDDEN }, { "visible", ZFS_SNAPDEV_VISIBLE }, { NULL } }; static const zprop_index_t acl_mode_table[] = { { "discard", ZFS_ACL_DISCARD }, { "groupmask", ZFS_ACL_GROUPMASK }, { "passthrough", ZFS_ACL_PASSTHROUGH }, { "restricted", ZFS_ACL_RESTRICTED }, { NULL } }; static const zprop_index_t acltype_table[] = { { "off", ZFS_ACLTYPE_OFF }, { "posix", ZFS_ACLTYPE_POSIX }, { "nfsv4", ZFS_ACLTYPE_NFSV4 }, { "disabled", ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */ { "noacl", ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */ { "posixacl", ZFS_ACLTYPE_POSIX }, /* bkwrd compatibility */ { NULL } }; static const zprop_index_t acl_inherit_table[] = { { "discard", ZFS_ACL_DISCARD }, { "noallow", ZFS_ACL_NOALLOW }, { "restricted", ZFS_ACL_RESTRICTED }, { "passthrough", ZFS_ACL_PASSTHROUGH }, { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatibility */ { "passthrough-x", ZFS_ACL_PASSTHROUGH_X }, { NULL } }; static const zprop_index_t case_table[] = { { "sensitive", ZFS_CASE_SENSITIVE }, { "insensitive", ZFS_CASE_INSENSITIVE }, { "mixed", ZFS_CASE_MIXED }, { NULL } }; static const zprop_index_t copies_table[] = { { "1", 1 }, { "2", 2 }, { "3", 3 }, { NULL } }; /* * Use the unique flags we have to send to u8_strcmp() and/or * u8_textprep() to represent the various normalization property * values. */ static const zprop_index_t normalize_table[] = { { "none", 0 }, { "formD", U8_TEXTPREP_NFD }, { "formKC", U8_TEXTPREP_NFKC }, { "formC", U8_TEXTPREP_NFC }, { "formKD", U8_TEXTPREP_NFKD }, { NULL } }; static const zprop_index_t version_table[] = { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 }, { "5", 5 }, { "current", ZPL_VERSION }, { NULL } }; static const zprop_index_t boolean_table[] = { { "off", 0 }, { "on", 1 }, { NULL } }; static const zprop_index_t keystatus_table[] = { { "none", ZFS_KEYSTATUS_NONE}, { "unavailable", ZFS_KEYSTATUS_UNAVAILABLE}, { "available", ZFS_KEYSTATUS_AVAILABLE}, { NULL } }; static const zprop_index_t logbias_table[] = { { "latency", ZFS_LOGBIAS_LATENCY }, { "throughput", ZFS_LOGBIAS_THROUGHPUT }, { NULL } }; static const zprop_index_t canmount_table[] = { { "off", ZFS_CANMOUNT_OFF }, { "on", ZFS_CANMOUNT_ON }, { "noauto", ZFS_CANMOUNT_NOAUTO }, { NULL } }; static const zprop_index_t cache_table[] = { { "none", ZFS_CACHE_NONE }, { "metadata", ZFS_CACHE_METADATA }, { "all", ZFS_CACHE_ALL }, { NULL } }; static const zprop_index_t sync_table[] = { { "standard", ZFS_SYNC_STANDARD }, { "always", ZFS_SYNC_ALWAYS }, { "disabled", ZFS_SYNC_DISABLED }, { NULL } }; static const zprop_index_t xattr_table[] = { { "off", ZFS_XATTR_OFF }, { "on", ZFS_XATTR_DIR }, { "sa", ZFS_XATTR_SA }, { "dir", ZFS_XATTR_DIR }, { NULL } }; static const zprop_index_t dnsize_table[] = { { "legacy", ZFS_DNSIZE_LEGACY }, { "auto", ZFS_DNSIZE_AUTO }, { "1k", ZFS_DNSIZE_1K }, { "2k", ZFS_DNSIZE_2K }, { "4k", ZFS_DNSIZE_4K }, { "8k", ZFS_DNSIZE_8K }, { "16k", ZFS_DNSIZE_16K }, { NULL } }; static const zprop_index_t redundant_metadata_table[] = { { "all", ZFS_REDUNDANT_METADATA_ALL }, { "most", ZFS_REDUNDANT_METADATA_MOST }, { NULL } }; static const zprop_index_t volmode_table[] = { { "default", ZFS_VOLMODE_DEFAULT }, { "full", ZFS_VOLMODE_GEOM }, { "geom", ZFS_VOLMODE_GEOM }, { "dev", ZFS_VOLMODE_DEV }, { "none", ZFS_VOLMODE_NONE }, { NULL } }; struct zfs_mod_supported_features *sfeatures = zfs_mod_list_supported(ZFS_SYSFS_DATASET_PROPERTIES); /* inherit index properties */ zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata", ZFS_REDUNDANT_METADATA_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "all | most", "REDUND_MD", redundant_metadata_table, sfeatures); zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "standard | always | disabled", "SYNC", sync_table, sfeatures); zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein" " | edonr | blake3", "CHECKSUM", checksum_table, sfeatures); zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | verify | sha256[,verify] | sha512[,verify] | " "skein[,verify] | edonr,verify | blake3[,verify]", "DEDUP", dedup_table, sfeatures); zprop_register_index(ZFS_PROP_COMPRESSION, "compression", ZIO_COMPRESS_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4 | " "zstd | zstd-[1-19] | " "zstd-fast | zstd-fast-[1-10,20,30,40,50,60,70,80,90,100,500,1000]", "COMPRESS", compress_table, sfeatures); zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "hidden | visible", "SNAPDIR", snapdir_table, sfeatures); zprop_register_index(ZFS_PROP_SNAPDEV, "snapdev", ZFS_SNAPDEV_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "hidden | visible", "SNAPDEV", snapdev_table, sfeatures); zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "discard | groupmask | passthrough | restricted", "ACLMODE", acl_mode_table, sfeatures); zprop_register_index(ZFS_PROP_ACLTYPE, "acltype", #ifdef __linux__ /* Linux doesn't natively support ZFS's NFSv4-style ACLs. */ ZFS_ACLTYPE_OFF, #else ZFS_ACLTYPE_NFSV4, #endif PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "off | nfsv4 | posix", "ACLTYPE", acltype_table, sfeatures); zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "discard | noallow | restricted | passthrough | passthrough-x", "ACLINHERIT", acl_inherit_table, sfeatures); zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "1 | 2 | 3", "COPIES", copies_table, sfeatures); zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache", ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "PRIMARYCACHE", cache_table, sfeatures); zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache", ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table, sfeatures); zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "latency | throughput", "LOGBIAS", logbias_table, sfeatures); zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off | dir | sa", "XATTR", xattr_table, sfeatures); zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize", ZFS_DNSIZE_LEGACY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "legacy | auto | 1k | 2k | 4k | 8k | 16k", "DNSIZE", dnsize_table, sfeatures); zprop_register_index(ZFS_PROP_VOLMODE, "volmode", ZFS_VOLMODE_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "default | full | geom | dev | none", "VOLMODE", volmode_table, sfeatures); /* inherit index (boolean) properties */ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table, sfeatures); - zprop_register_index(ZFS_PROP_RELATIME, "relatime", 0, PROP_INHERIT, + zprop_register_index(ZFS_PROP_RELATIME, "relatime", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "RELATIME", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY", boolean_table, sfeatures); #ifdef __FreeBSD__ zprop_register_index(ZFS_PROP_ZONED, "jailed", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "JAILED", boolean_table, sfeatures); #else zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table, sfeatures); #endif zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_OVERLAY, "overlay", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "OVERLAY", boolean_table, sfeatures); /* default index properties */ zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table, sfeatures); zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table, sfeatures); /* readonly index properties */ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", boolean_table, sfeatures); zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus", ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET, "none | unavailable | available", "KEYSTATUS", keystatus_table, sfeatures); /* set once index properties */ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "none | formC | formD | formKC | formKD", "NORMALIZATION", normalize_table, sfeatures); zprop_register_index(ZFS_PROP_CASE, "casesensitivity", ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table, sfeatures); zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat", ZFS_KEYFORMAT_NONE, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "none | raw | hex | passphrase", "KEYFORMAT", keyformat_table, sfeatures); zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption", ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET, "on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | " "aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION", crypto_table, sfeatures); /* set once index (boolean) properties */ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "UTF8ONLY", boolean_table, sfeatures); /* string properties */ zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "ORIGIN", sfeatures); zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "[,...]", "CLONES", sfeatures); zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, " | legacy | none", "MOUNTPOINT", sfeatures); zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | NFS share options", "SHARENFS", sfeatures); zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "filesystem | volume | snapshot | bookmark", "TYPE", sfeatures); zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | SMB share options", "SHARESMB", sfeatures); zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel", ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET, "", "MLSLABEL", sfeatures); zprop_register_string(ZFS_PROP_SELINUX_CONTEXT, "context", "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "", "CONTEXT", sfeatures); zprop_register_string(ZFS_PROP_SELINUX_FSCONTEXT, "fscontext", "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "", "FSCONTEXT", sfeatures); zprop_register_string(ZFS_PROP_SELINUX_DEFCONTEXT, "defcontext", "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "", "DEFCONTEXT", sfeatures); zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext", "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "", "ROOTCONTEXT", sfeatures); zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN, "receive_resume_token", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "RESUMETOK", sfeatures); zprop_register_string(ZFS_PROP_ENCRYPTION_ROOT, "encryptionroot", NULL, PROP_READONLY, ZFS_TYPE_DATASET, "", "ENCROOT", sfeatures); zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation", "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "prompt | | | ", "KEYLOCATION", sfeatures); zprop_register_string(ZFS_PROP_REDACT_SNAPS, "redact_snaps", NULL, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "[,...]", "RSNAPS", sfeatures); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "USED", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "AVAIL", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "", "REFER", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<1.00x or higher if compressed>", "RATIO", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "REFRATIO", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDSNAP", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDDS", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDCHILD", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDREFRESERV", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "", "USERREFS", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "WRITTEN", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_LOGICALUSED, "logicalused", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "LUSED", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "", "LREFER", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count", UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "", "FSCOUNT", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count", UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "SSCOUNT", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_GUID, "guid", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "", "GUID", B_TRUE, sfeatures); zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "", "CREATETXG", B_TRUE, sfeatures); zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters", 0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "PBKDF2ITERS", B_TRUE, sfeatures); zprop_register_number(ZFS_PROP_OBJSETID, "objsetid", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "OBJSETID", B_TRUE, sfeatures); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, " | none", "QUOTA", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, " | none", "RESERV", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT, ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "", "VOLSIZE", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, " | none", "REFQUOTA", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, " | none", "REFRESERV", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_FILESYSTEM_LIMIT, "filesystem_limit", UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, " | none", "FSLIMIT", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_SNAPSHOT_LIMIT, "snapshot_limit", UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, " | none", "SSLIMIT", B_FALSE, sfeatures); /* inherit number properties */ zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", SPA_OLD_MAXBLOCKSIZE, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE", B_FALSE, sfeatures); zprop_register_number(ZFS_PROP_SPECIAL_SMALL_BLOCKS, "special_small_blocks", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "zero or 512 to 1M, power of 2", "SPECIAL_SMALL_BLOCKS", B_FALSE, sfeatures); /* hidden properties */ zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES", B_FALSE, sfeatures); zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "NAME", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu", PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "STMF_SBD_LU", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "USERACCOUNTING", B_FALSE, sfeatures); zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE", B_FALSE, sfeatures); zprop_register_hidden(ZFS_PROP_INCONSISTENT, "inconsistent", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT", B_FALSE, sfeatures); zprop_register_hidden(ZFS_PROP_IVSET_GUID, "ivsetguid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "IVSETGUID", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt", PROP_TYPE_NUMBER, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT", B_FALSE, sfeatures); zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID", B_TRUE, sfeatures); zprop_register_hidden(ZFS_PROP_REDACTED, "redacted", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "REDACTED", B_FALSE, sfeatures); /* * Properties that are obsolete and not used. These are retained so * that we don't have to change the values of the zfs_prop_t enum, or * have NULL pointers in the zfs_prop_table[]. */ zprop_register_hidden(ZFS_PROP_REMAPTXG, "remaptxg", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "REMAPTXG", B_FALSE, sfeatures); /* oddball properties */ /* 'creation' is a number but displayed as human-readable => flex */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "", "CREATION", B_FALSE, B_TRUE, B_TRUE, NULL, sfeatures); zprop_register_impl(ZFS_PROP_SNAPSHOTS_CHANGED, "snapshots_changed", PROP_TYPE_NUMBER, 0, NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "SNAPSHOTS_CHANGED", B_FALSE, B_TRUE, B_TRUE, NULL, sfeatures); zfs_mod_list_supported_free(sfeatures); } boolean_t zfs_prop_delegatable(zfs_prop_t prop) { zprop_desc_t *pd = &zfs_prop_table[prop]; /* The mlslabel property is never delegatable. */ if (prop == ZFS_PROP_MLSLABEL) return (B_FALSE); return (pd->pd_attr != PROP_READONLY); } /* * Given a zfs dataset property name, returns the corresponding property ID. */ zfs_prop_t zfs_name_to_prop(const char *propname) { return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); } /* * Returns true if this is a valid user-defined property (one with a ':'). */ boolean_t zfs_prop_user(const char *name) { int i; char c; boolean_t foundsep = B_FALSE; for (i = 0; i < strlen(name); i++) { c = name[i]; if (!zprop_valid_char(c)) return (B_FALSE); if (c == ':') foundsep = B_TRUE; } if (!foundsep) return (B_FALSE); return (B_TRUE); } /* * Returns true if this is a valid userspace-type property (one with a '@'). * Note that after the @, any character is valid (eg, another @, for SID * user@domain). */ boolean_t zfs_prop_userquota(const char *name) { zfs_userquota_prop_t prop; for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) { if (strncmp(name, zfs_userquota_prop_prefixes[prop], strlen(zfs_userquota_prop_prefixes[prop])) == 0) { return (B_TRUE); } } return (B_FALSE); } /* * Returns true if this is a valid written@ property. * Note that after the @, any character is valid (eg, another @, for * written@pool/fs@origin). */ boolean_t zfs_prop_written(const char *name) { static const char *prop_prefix = "written@"; static const char *book_prefix = "written#"; return (strncmp(name, prop_prefix, strlen(prop_prefix)) == 0 || strncmp(name, book_prefix, strlen(book_prefix)) == 0); } /* * Tables of index types, plus functions to convert between the user view * (strings) and internal representation (uint64_t). */ int zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index) { return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET)); } int zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string) { return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET)); } uint64_t zfs_prop_random_value(zfs_prop_t prop, uint64_t seed) { return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET)); } /* * Returns TRUE if the property applies to any of the given dataset types. */ boolean_t zfs_prop_valid_for_type(int prop, zfs_type_t types, boolean_t headcheck) { return (zprop_valid_for_type(prop, types, headcheck)); } zprop_type_t zfs_prop_get_type(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_proptype); } /* * Returns TRUE if the property is readonly. */ boolean_t zfs_prop_readonly(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_READONLY || zfs_prop_table[prop].pd_attr == PROP_ONETIME || zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } /* * Returns TRUE if the property is visible (not hidden). */ boolean_t zfs_prop_visible(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_visible && zfs_prop_table[prop].pd_zfs_mod_supported); } /* * Returns TRUE if the property is only allowed to be set once. */ boolean_t zfs_prop_setonce(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_ONETIME || zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } const char * zfs_prop_default_string(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_strdefault); } uint64_t zfs_prop_default_numeric(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_numdefault); } /* * Given a dataset property ID, returns the corresponding name. * Assuming the zfs dataset property ID is valid. */ const char * zfs_prop_to_name(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_name); } /* * Returns TRUE if the property is inheritable. */ boolean_t zfs_prop_inheritable(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_INHERIT || zfs_prop_table[prop].pd_attr == PROP_ONETIME); } /* * Returns TRUE if property is one of the encryption properties that requires * a loaded encryption key to modify. */ boolean_t zfs_prop_encryption_key_param(zfs_prop_t prop) { /* * keylocation does not count as an encryption property. It can be * changed at will without needing the master keys. */ return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS || prop == ZFS_PROP_KEYFORMAT); } /* * Helper function used by both kernelspace and userspace to check the * keylocation property. If encrypted is set, the keylocation must be valid * for an encrypted dataset. */ boolean_t zfs_prop_valid_keylocation(const char *str, boolean_t encrypted) { if (strcmp("none", str) == 0) return (!encrypted); else if (strcmp("prompt", str) == 0) return (B_TRUE); else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) return (B_TRUE); else if (strlen(str) > 8 && strncmp("https://", str, 8) == 0) return (B_TRUE); else if (strlen(str) > 7 && strncmp("http://", str, 7) == 0) return (B_TRUE); return (B_FALSE); } #ifndef _KERNEL #include /* * Returns a string describing the set of acceptable values for the given * zfs property, or NULL if it cannot be set. */ const char * zfs_prop_values(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_values); } /* * Returns TRUE if this property is a string type. Note that index types * (compression, checksum) are treated as strings in userland, even though they * are stored numerically on disk. */ int zfs_prop_is_string(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING || zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX); } /* * Returns the column header for the given property. Used only in * 'zfs list -o', but centralized here with the other property information. */ const char * zfs_prop_column_name(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_colname); } /* * Returns whether the given property should be displayed right-justified for * 'zfs list'. */ boolean_t zfs_prop_align_right(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_rightalign); } #endif #if defined(_KERNEL) #include #if defined(HAVE_KERNEL_FPU_INTERNAL) uint8_t **zfs_kfpu_fpregs; EXPORT_SYMBOL(zfs_kfpu_fpregs); #endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ extern int __init zcommon_init(void); extern void zcommon_fini(void); int __init zcommon_init(void) { int error = kfpu_init(); if (error) return (error); fletcher_4_init(); return (0); } void zcommon_fini(void) { fletcher_4_fini(); kfpu_fini(); } #ifdef __FreeBSD__ module_init_early(zcommon_init); module_exit(zcommon_fini); #endif #endif /* zfs dataset property functions */ EXPORT_SYMBOL(zfs_userquota_prop_prefixes); EXPORT_SYMBOL(zfs_prop_init); EXPORT_SYMBOL(zfs_prop_get_type); EXPORT_SYMBOL(zfs_prop_get_table); EXPORT_SYMBOL(zfs_prop_delegatable); EXPORT_SYMBOL(zfs_prop_visible); /* Dataset property functions shared between libzfs and kernel. */ EXPORT_SYMBOL(zfs_prop_default_string); EXPORT_SYMBOL(zfs_prop_default_numeric); EXPORT_SYMBOL(zfs_prop_readonly); EXPORT_SYMBOL(zfs_prop_inheritable); EXPORT_SYMBOL(zfs_prop_encryption_key_param); EXPORT_SYMBOL(zfs_prop_valid_keylocation); EXPORT_SYMBOL(zfs_prop_setonce); EXPORT_SYMBOL(zfs_prop_to_name); EXPORT_SYMBOL(zfs_name_to_prop); EXPORT_SYMBOL(zfs_prop_user); EXPORT_SYMBOL(zfs_prop_userquota); EXPORT_SYMBOL(zfs_prop_index_to_string); EXPORT_SYMBOL(zfs_prop_string_to_index); EXPORT_SYMBOL(zfs_prop_valid_for_type); EXPORT_SYMBOL(zfs_prop_written); diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index 25cb4f6ab1ad..ce2e6ce742a2 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -1,2861 +1,2861 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2017, Datto, Inc. All rights reserved. * Copyright (c) 2018 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include /* * This file's primary purpose is for managing master encryption keys in * memory and on disk. For more info on how these keys are used, see the * block comment in zio_crypt.c. * * All master keys are stored encrypted on disk in the form of the DSL * Crypto Key ZAP object. The binary key data in this object is always * randomly generated and is encrypted with the user's wrapping key. This * layer of indirection allows the user to change their key without * needing to re-encrypt the entire dataset. The ZAP also holds on to the * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to * safely decrypt the master key. For more info on the user's key see the * block comment in libzfs_crypto.c * * In-memory encryption keys are managed through the spa_keystore. The * keystore consists of 3 AVL trees, which are as follows: * * The Wrapping Key Tree: * The wrapping key (wkey) tree stores the user's keys that are fed into the * kernel through 'zfs load-key' and related commands. Datasets inherit their * parent's wkey by default, so these structures are refcounted. The wrapping * keys remain in memory until they are explicitly unloaded (with * "zfs unload-key"). Unloading is only possible when no datasets are using * them (refcount=0). * * The DSL Crypto Key Tree: * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted * master keys. They are used by the functions in zio_crypt.c to perform * encryption, decryption, and authentication. Snapshots and clones of a given * dataset will share a DSL Crypto Key, so they are also refcounted. Once the * refcount on a key hits zero, it is immediately zeroed out and freed. * * The Crypto Key Mapping Tree: * The zio layer needs to lookup master keys by their dataset object id. Since * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of * dsl_key_mapping_t's which essentially just map the dataset object id to its * appropriate DSL Crypto Key. The management for creating and destroying these * mappings hooks into the code for owning and disowning datasets. Usually, * there will only be one active dataset owner, but there are times * (particularly during dataset creation and destruction) when this may not be * true or the dataset may not be initialized enough to own. As a result, this * object is also refcounted. */ /* * This tunable allows datasets to be raw received even if the stream does * not include IVset guids or if the guids don't match. This is used as part * of the resolution for ZPOOL_ERRATA_ZOL_8308_ENCRYPTION. */ int zfs_disable_ivset_guid_check = 0; static void dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, const void *tag) { (void) zfs_refcount_add(&wkey->wk_refcnt, tag); } static void dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, const void *tag) { (void) zfs_refcount_remove(&wkey->wk_refcnt, tag); } static void dsl_wrapping_key_free(dsl_wrapping_key_t *wkey) { ASSERT0(zfs_refcount_count(&wkey->wk_refcnt)); if (wkey->wk_key.ck_data) { memset(wkey->wk_key.ck_data, 0, CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); kmem_free(wkey->wk_key.ck_data, CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); } zfs_refcount_destroy(&wkey->wk_refcnt); kmem_free(wkey, sizeof (dsl_wrapping_key_t)); } static void dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat, uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out) { dsl_wrapping_key_t *wkey; /* allocate the wrapping key */ wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP); /* allocate and initialize the underlying crypto key */ wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP); wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN); memcpy(wkey->wk_key.ck_data, wkeydata, WRAPPING_KEY_LEN); /* initialize the rest of the struct */ zfs_refcount_create(&wkey->wk_refcnt); wkey->wk_keyformat = keyformat; wkey->wk_salt = salt; wkey->wk_iters = iters; *wkey_out = wkey; } int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out) { int ret; uint64_t crypt = ZIO_CRYPT_INHERIT; uint64_t keyformat = ZFS_KEYFORMAT_NONE; uint64_t salt = 0, iters = 0; dsl_crypto_params_t *dcp = NULL; dsl_wrapping_key_t *wkey = NULL; uint8_t *wkeydata = NULL; uint_t wkeydata_len = 0; char *keylocation = NULL; dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP); dcp->cp_cmd = cmd; /* get relevant arguments from the nvlists */ if (props != NULL) { (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); (void) nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); dcp->cp_crypt = crypt; } if (crypto_args != NULL) { (void) nvlist_lookup_uint8_array(crypto_args, "wkeydata", &wkeydata, &wkeydata_len); } /* check for valid command */ if (dcp->cp_cmd >= DCP_CMD_MAX) { ret = SET_ERROR(EINVAL); goto error; } else { dcp->cp_cmd = cmd; } /* check for valid crypt */ if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) { ret = SET_ERROR(EINVAL); goto error; } else { dcp->cp_crypt = crypt; } /* check for valid keyformat */ if (keyformat >= ZFS_KEYFORMAT_FORMATS) { ret = SET_ERROR(EINVAL); goto error; } /* check for a valid keylocation (of any kind) and copy it in */ if (keylocation != NULL) { if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) { ret = SET_ERROR(EINVAL); goto error; } dcp->cp_keylocation = spa_strdup(keylocation); } /* check wrapping key length, if given */ if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) { ret = SET_ERROR(EINVAL); goto error; } /* if the user asked for the default crypt, determine that now */ if (dcp->cp_crypt == ZIO_CRYPT_ON) dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; /* create the wrapping key from the raw data */ if (wkeydata != NULL) { /* create the wrapping key with the verified parameters */ dsl_wrapping_key_create(wkeydata, keyformat, salt, iters, &wkey); dcp->cp_wkey = wkey; } /* * Remove the encryption properties from the nvlist since they are not * maintained through the DSL. */ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); *dcp_out = dcp; return (0); error: kmem_free(dcp, sizeof (dsl_crypto_params_t)); *dcp_out = NULL; return (ret); } void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload) { if (dcp == NULL) return; if (dcp->cp_keylocation != NULL) spa_strfree(dcp->cp_keylocation); if (unload && dcp->cp_wkey != NULL) dsl_wrapping_key_free(dcp->cp_wkey); kmem_free(dcp, sizeof (dsl_crypto_params_t)); } static int spa_crypto_key_compare(const void *a, const void *b) { const dsl_crypto_key_t *dcka = a; const dsl_crypto_key_t *dckb = b; if (dcka->dck_obj < dckb->dck_obj) return (-1); if (dcka->dck_obj > dckb->dck_obj) return (1); return (0); } static int spa_key_mapping_compare(const void *a, const void *b) { const dsl_key_mapping_t *kma = a; const dsl_key_mapping_t *kmb = b; if (kma->km_dsobj < kmb->km_dsobj) return (-1); if (kma->km_dsobj > kmb->km_dsobj) return (1); return (0); } static int spa_wkey_compare(const void *a, const void *b) { const dsl_wrapping_key_t *wka = a; const dsl_wrapping_key_t *wkb = b; if (wka->wk_ddobj < wkb->wk_ddobj) return (-1); if (wka->wk_ddobj > wkb->wk_ddobj) return (1); return (0); } void spa_keystore_init(spa_keystore_t *sk) { rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL); rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL); rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL); avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare, sizeof (dsl_crypto_key_t), offsetof(dsl_crypto_key_t, dck_avl_link)); avl_create(&sk->sk_key_mappings, spa_key_mapping_compare, sizeof (dsl_key_mapping_t), offsetof(dsl_key_mapping_t, km_avl_link)); avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t), offsetof(dsl_wrapping_key_t, wk_avl_link)); } void spa_keystore_fini(spa_keystore_t *sk) { dsl_wrapping_key_t *wkey; void *cookie = NULL; ASSERT(avl_is_empty(&sk->sk_dsl_keys)); ASSERT(avl_is_empty(&sk->sk_key_mappings)); while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL) dsl_wrapping_key_free(wkey); avl_destroy(&sk->sk_wkeys); avl_destroy(&sk->sk_key_mappings); avl_destroy(&sk->sk_dsl_keys); rw_destroy(&sk->sk_wkeys_lock); rw_destroy(&sk->sk_km_lock); rw_destroy(&sk->sk_dk_lock); } static int dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) { if (dd->dd_crypto_obj == 0) return (SET_ERROR(ENOENT)); return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); } static int dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) { *version = 0; if (dd->dd_crypto_obj == 0) return (SET_ERROR(ENOENT)); /* version 0 is implied by ENOENT */ (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, 8, 1, version); return (0); } boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) { int ret; uint64_t version = 0; ret = dsl_dir_get_encryption_version(dd, &version); if (ret != 0) return (B_FALSE); return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); } static int spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, const void *tag, dsl_wrapping_key_t **wkey_out) { int ret; dsl_wrapping_key_t search_wkey; dsl_wrapping_key_t *found_wkey; ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock)); /* init the search wrapping key */ search_wkey.wk_ddobj = ddobj; /* lookup the wrapping key */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); if (!found_wkey) { ret = SET_ERROR(ENOENT); goto error; } /* increment the refcount */ dsl_wrapping_key_hold(found_wkey, tag); *wkey_out = found_wkey; return (0); error: *wkey_out = NULL; return (ret); } static int spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, const void *tag, dsl_wrapping_key_t **wkey_out) { int ret; dsl_wrapping_key_t *wkey; uint64_t rddobj; boolean_t locked = B_FALSE; if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) { rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER); locked = B_TRUE; } /* get the ddobj that the keylocation property was inherited from */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto error; /* lookup the wkey in the avl tree */ ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey); if (ret != 0) goto error; /* unlock the wkey tree if we locked it */ if (locked) rw_exit(&spa->spa_keystore.sk_wkeys_lock); *wkey_out = wkey; return (0); error: if (locked) rw_exit(&spa->spa_keystore.sk_wkeys_lock); *wkey_out = NULL; return (ret); } int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation) { int ret = 0; dsl_dir_t *dd = NULL; dsl_pool_t *dp = NULL; uint64_t rddobj; /* hold the dsl dir */ ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto out; ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto out; } /* if dd is not encrypted, the value may only be "none" */ if (dd->dd_crypto_obj == 0) { if (strcmp(keylocation, "none") != 0) { ret = SET_ERROR(EACCES); goto out; } ret = 0; goto out; } /* check for a valid keylocation for encrypted datasets */ if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) { ret = SET_ERROR(EINVAL); goto out; } /* check that this is an encryption root */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto out; if (rddobj != dd->dd_object) { ret = SET_ERROR(EACCES); goto out; } dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); return (0); out: if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } static void dsl_crypto_key_free(dsl_crypto_key_t *dck) { ASSERT(zfs_refcount_count(&dck->dck_holds) == 0); /* destroy the zio_crypt_key_t */ zio_crypt_key_destroy(&dck->dck_key); /* free the refcount, wrapping key, and lock */ zfs_refcount_destroy(&dck->dck_holds); if (dck->dck_wkey) dsl_wrapping_key_rele(dck->dck_wkey, dck); /* free the key */ kmem_free(dck, sizeof (dsl_crypto_key_t)); } static void dsl_crypto_key_rele(dsl_crypto_key_t *dck, const void *tag) { if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) dsl_crypto_key_free(dck); } static int dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, uint64_t dckobj, const void *tag, dsl_crypto_key_t **dck_out) { int ret; uint64_t crypt = 0, guid = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; dsl_crypto_key_t *dck; /* allocate and initialize the key */ dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP); /* fetch all of the values we need from the ZAP */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, raw_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, raw_hmac_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac); if (ret != 0) goto error; /* the initial on-disk format for encryption did not have a version */ (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); /* * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; } /* finish initializing the dsl_crypto_key_t */ zfs_refcount_create(&dck->dck_holds); dsl_wrapping_key_hold(wkey, dck); dck->dck_wkey = wkey; dck->dck_obj = dckobj; zfs_refcount_add(&dck->dck_holds, tag); *dck_out = dck; return (0); error: if (dck != NULL) { memset(dck, 0, sizeof (dsl_crypto_key_t)); kmem_free(dck, sizeof (dsl_crypto_key_t)); } *dck_out = NULL; return (ret); } static int spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, const void *tag, dsl_crypto_key_t **dck_out) { int ret; dsl_crypto_key_t search_dck; dsl_crypto_key_t *found_dck; ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock)); /* init the search key */ search_dck.dck_obj = dckobj; /* find the matching key in the keystore */ found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL); if (!found_dck) { ret = SET_ERROR(ENOENT); goto error; } /* increment the refcount */ zfs_refcount_add(&found_dck->dck_holds, tag); *dck_out = found_dck; return (0); error: *dck_out = NULL; return (ret); } static int spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, const void *tag, dsl_crypto_key_t **dck_out) { int ret; avl_index_t where; dsl_crypto_key_t *dck_io = NULL, *dck_ks = NULL; dsl_wrapping_key_t *wkey = NULL; uint64_t dckobj = dd->dd_crypto_obj; /* Lookup the key in the tree of currently loaded keys */ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_READER); ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); rw_exit(&spa->spa_keystore.sk_dk_lock); if (ret == 0) { *dck_out = dck_ks; return (0); } /* Lookup the wrapping key from the keystore */ ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey); if (ret != 0) { *dck_out = NULL; return (SET_ERROR(EACCES)); } /* Read the key from disk */ ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj, tag, &dck_io); if (ret != 0) { dsl_wrapping_key_rele(wkey, FTAG); *dck_out = NULL; return (ret); } /* * Add the key to the keystore. It may already exist if it was * added while performing the read from disk. In this case discard * it and return the key from the keystore. */ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); if (ret != 0) { avl_find(&spa->spa_keystore.sk_dsl_keys, dck_io, &where); avl_insert(&spa->spa_keystore.sk_dsl_keys, dck_io, where); *dck_out = dck_io; } else { dsl_crypto_key_free(dck_io); *dck_out = dck_ks; } /* Release the wrapping key (the dsl key now has a reference to it) */ dsl_wrapping_key_rele(wkey, FTAG); rw_exit(&spa->spa_keystore.sk_dk_lock); return (0); } void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, const void *tag) { rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) { avl_remove(&spa->spa_keystore.sk_dsl_keys, dck); dsl_crypto_key_free(dck); } rw_exit(&spa->spa_keystore.sk_dk_lock); } int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey) { int ret; avl_index_t where; dsl_wrapping_key_t *found_wkey; rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* insert the wrapping key into the keystore */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); if (found_wkey != NULL) { ret = SET_ERROR(EEXIST); goto error_unlock; } avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (ret); } int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, boolean_t noop) { int ret; dsl_dir_t *dd = NULL; dsl_crypto_key_t *dck = NULL; dsl_wrapping_key_t *wkey = dcp->cp_wkey; dsl_pool_t *dp = NULL; uint64_t rddobj, keyformat, salt, iters; /* * We don't validate the wrapping key's keyformat, salt, or iters * since they will never be needed after the DCK has been wrapped. */ if (dcp->cp_wkey == NULL || dcp->cp_cmd != DCP_CMD_NONE || dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL) return (SET_ERROR(EINVAL)); ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto error; if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = SET_ERROR(ENOTSUP); goto error; } /* hold the dsl dir */ ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* confirm that dd is the encryption root */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0 || rddobj != dd->dd_object) { ret = SET_ERROR(EINVAL); goto error; } /* initialize the wkey's ddobj */ wkey->wk_ddobj = dd->dd_object; /* verify that the wkey is correct by opening its dsl key */ ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey, dd->dd_crypto_obj, FTAG, &dck); if (ret != 0) goto error; /* initialize the wkey encryption parameters from the DSL Crypto Key */ ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat); if (ret != 0) goto error; ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); if (ret != 0) goto error; ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); if (ret != 0) goto error; ASSERT3U(keyformat, <, ZFS_KEYFORMAT_FORMATS); ASSERT3U(keyformat, !=, ZFS_KEYFORMAT_NONE); IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, iters != 0); IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, salt != 0); IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, iters == 0); IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, salt == 0); wkey->wk_keyformat = keyformat; wkey->wk_salt = salt; wkey->wk_iters = iters; /* * At this point we have verified the wkey and confirmed that it can * be used to decrypt a DSL Crypto Key. We can simply cleanup and * return if this is all the user wanted to do. */ if (noop) goto error; /* insert the wrapping key into the keystore */ ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey); if (ret != 0) goto error; dsl_crypto_key_rele(dck, FTAG); dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); /* create any zvols under this ds */ zvol_create_minors_recursive(dsname); return (0); error: if (dck != NULL) dsl_crypto_key_rele(dck, FTAG); if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj) { int ret; dsl_wrapping_key_t search_wkey; dsl_wrapping_key_t *found_wkey; /* init the search wrapping key */ search_wkey.wk_ddobj = ddobj; rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* remove the wrapping key from the keystore */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); if (!found_wkey) { ret = SET_ERROR(EACCES); goto error_unlock; } else if (zfs_refcount_count(&found_wkey->wk_refcnt) != 0) { ret = SET_ERROR(EBUSY); goto error_unlock; } avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); rw_exit(&spa->spa_keystore.sk_wkeys_lock); /* free the wrapping key */ dsl_wrapping_key_free(found_wkey); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (ret); } int spa_keystore_unload_wkey(const char *dsname) { int ret = 0; dsl_dir_t *dd = NULL; dsl_pool_t *dp = NULL; spa_t *spa = NULL; ret = spa_open(dsname, &spa, FTAG); if (ret != 0) return (ret); /* * Wait for any outstanding txg IO to complete, releasing any * remaining references on the wkey. */ if (spa_mode(spa) != SPA_MODE_READ) txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); /* hold the dsl dir */ ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto error; if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = (SET_ERROR(ENOTSUP)); goto error; } ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* unload the wkey */ ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); if (ret != 0) goto error; dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); /* remove any zvols under this ds */ zvol_remove_minors(dp->dp_spa, dsname, B_TRUE); return (0); error: if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } void key_mapping_add_ref(dsl_key_mapping_t *km, const void *tag) { ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); zfs_refcount_add(&km->km_refcnt, tag); } /* * The locking here is a little tricky to ensure we don't cause unnecessary * performance problems. We want to release a key mapping whenever someone * decrements the refcount to 0, but freeing the mapping requires removing * it from the spa_keystore, which requires holding sk_km_lock as a writer. * Most of the time we don't want to hold this lock as a writer, since the * same lock is held as a reader for each IO that needs to encrypt / decrypt * data for any dataset and in practice we will only actually free the * mapping after unmounting a dataset. */ void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, const void *tag) { ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); if (zfs_refcount_remove(&km->km_refcnt, tag) != 0) return; /* * We think we are going to need to free the mapping. Add a * reference to prevent most other releasers from thinking * this might be their responsibility. This is inherently * racy, so we will confirm that we are legitimately the * last holder once we have the sk_km_lock as a writer. */ zfs_refcount_add(&km->km_refcnt, FTAG); rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) { rw_exit(&spa->spa_keystore.sk_km_lock); return; } avl_remove(&spa->spa_keystore.sk_key_mappings, km); rw_exit(&spa->spa_keystore.sk_km_lock); spa_keystore_dsl_key_rele(spa, km->km_key, km); zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); } int spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, const void *tag, dsl_key_mapping_t **km_out) { int ret; avl_index_t where; dsl_key_mapping_t *km, *found_km; boolean_t should_free = B_FALSE; /* Allocate and initialize the mapping */ km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP); zfs_refcount_create(&km->km_refcnt); ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key); if (ret != 0) { zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); if (km_out != NULL) *km_out = NULL; return (ret); } km->km_dsobj = ds->ds_object; rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); /* * If a mapping already exists, simply increment its refcount and * cleanup the one we made. We want to allocate / free outside of * the lock because this lock is also used by the zio layer to lookup * key mappings. Otherwise, use the one we created. Normally, there will * only be one active reference at a time (the objset owner), but there * are times when there could be multiple async users. */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where); if (found_km != NULL) { should_free = B_TRUE; zfs_refcount_add(&found_km->km_refcnt, tag); if (km_out != NULL) *km_out = found_km; } else { zfs_refcount_add(&km->km_refcnt, tag); avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); if (km_out != NULL) *km_out = km; } rw_exit(&spa->spa_keystore.sk_km_lock); if (should_free) { spa_keystore_dsl_key_rele(spa, km->km_key, km); zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); } return (0); } int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, const void *tag) { int ret; dsl_key_mapping_t search_km; dsl_key_mapping_t *found_km; /* init the search key mapping */ search_km.km_dsobj = dsobj; rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); /* find the matching mapping */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, NULL); if (found_km == NULL) { ret = SET_ERROR(ENOENT); goto error_unlock; } rw_exit(&spa->spa_keystore.sk_km_lock); key_mapping_rele(spa, found_km, tag); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_km_lock); return (ret); } /* * This function is primarily used by the zio and arc layer to lookup * DSL Crypto Keys for encryption. Callers must release the key with * spa_keystore_dsl_key_rele(). The function may also be called with * dck_out == NULL and tag == NULL to simply check that a key exists * without getting a reference to it. */ int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, const void *tag, dsl_crypto_key_t **dck_out) { int ret; dsl_key_mapping_t search_km; dsl_key_mapping_t *found_km; ASSERT((tag != NULL && dck_out != NULL) || (tag == NULL && dck_out == NULL)); /* init the search key mapping */ search_km.km_dsobj = dsobj; rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); /* remove the mapping from the tree */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, NULL); if (found_km == NULL) { ret = SET_ERROR(ENOENT); goto error_unlock; } if (found_km && tag) zfs_refcount_add(&found_km->km_key->dck_holds, tag); rw_exit(&spa->spa_keystore.sk_km_lock); if (dck_out != NULL) *dck_out = found_km->km_key; return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_km_lock); if (dck_out != NULL) *dck_out = NULL; return (ret); } static int dmu_objset_check_wkey_loaded(dsl_dir_t *dd) { int ret; dsl_wrapping_key_t *wkey = NULL; ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG, &wkey); if (ret != 0) return (SET_ERROR(EACCES)); dsl_wrapping_key_rele(wkey, FTAG); return (0); } -static zfs_keystatus_t +zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd) { /* check if this dd has a has a dsl key */ if (dd->dd_crypto_obj == 0) return (ZFS_KEYSTATUS_NONE); return (dmu_objset_check_wkey_loaded(dd) == 0 ? ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE); } static int dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt) { if (dd->dd_crypto_obj == 0) { *crypt = ZIO_CRYPT_OFF; return (0); } return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt)); } static void dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt, uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac, uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat, uint64_t salt, uint64_t iters, dmu_tx_t *tx) { VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &root_ddobj, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, keydata, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, hmac_keydata, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters, tx)); } static void dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) { zio_crypt_key_t *key = &dck->dck_key; dsl_wrapping_key_t *wkey = dck->dck_wkey; uint8_t keydata[MASTER_KEY_MAX_LEN]; uint8_t hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); /* encrypt and store the keys along with the IV and MAC */ VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, keydata, hmac_keydata)); /* update the ZAP with the obtained values */ dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, tx); } typedef struct spa_keystore_change_key_args { const char *skcka_dsname; dsl_crypto_params_t *skcka_cp; } spa_keystore_change_key_args_t; static int spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) { int ret; dsl_dir_t *dd = NULL; dsl_pool_t *dp = dmu_tx_pool(tx); spa_keystore_change_key_args_t *skcka = arg; dsl_crypto_params_t *dcp = skcka->skcka_cp; uint64_t rddobj; /* check for the encryption feature */ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = SET_ERROR(ENOTSUP); goto error; } /* check for valid key change command */ if (dcp->cp_cmd != DCP_CMD_NEW_KEY && dcp->cp_cmd != DCP_CMD_INHERIT && dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY && dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) { ret = SET_ERROR(EINVAL); goto error; } /* hold the dd */ ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* verify that the dataset is encrypted */ if (dd->dd_crypto_obj == 0) { ret = SET_ERROR(EINVAL); goto error; } /* clones must always use their origin's key */ if (dsl_dir_is_clone(dd)) { ret = SET_ERROR(EINVAL); goto error; } /* lookup the ddobj we are inheriting the keylocation from */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto error; /* Handle inheritance */ if (dcp->cp_cmd == DCP_CMD_INHERIT || dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) { /* no other encryption params should be given */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL || dcp->cp_wkey != NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that this is an encryption root */ if (dd->dd_object != rddobj) { ret = SET_ERROR(EINVAL); goto error; } /* check that the parent is encrypted */ if (dd->dd_parent->dd_crypto_obj == 0) { ret = SET_ERROR(EINVAL); goto error; } /* if we are rewrapping check that both keys are loaded */ if (dcp->cp_cmd == DCP_CMD_INHERIT) { ret = dmu_objset_check_wkey_loaded(dd); if (ret != 0) goto error; ret = dmu_objset_check_wkey_loaded(dd->dd_parent); if (ret != 0) goto error; } dsl_dir_rele(dd, FTAG); return (0); } /* handle forcing an encryption root without rewrapping */ if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { /* no other encryption params should be given */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL || dcp->cp_wkey != NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that this is not an encryption root */ if (dd->dd_object == rddobj) { ret = SET_ERROR(EINVAL); goto error; } dsl_dir_rele(dd, FTAG); return (0); } /* crypt cannot be changed after creation */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) { ret = SET_ERROR(EINVAL); goto error; } /* we are not inheritting our parent's wkey so we need one ourselves */ if (dcp->cp_wkey == NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check for a valid keyformat for the new wrapping key */ if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS || dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) { ret = SET_ERROR(EINVAL); goto error; } /* * If this dataset is not currently an encryption root we need a new * keylocation for this dataset's new wrapping key. Otherwise we can * just keep the one we already had. */ if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that the keylocation is valid if it is not NULL */ if (dcp->cp_keylocation != NULL && !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) { ret = SET_ERROR(EINVAL); goto error; } /* passphrases require pbkdf2 salt and iters */ if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) { if (dcp->cp_wkey->wk_salt == 0 || dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) { ret = SET_ERROR(EINVAL); goto error; } } else { if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) { ret = SET_ERROR(EINVAL); goto error; } } /* make sure the dd's wkey is loaded */ ret = dmu_objset_check_wkey_loaded(dd); if (ret != 0) goto error; dsl_dir_rele(dd, FTAG); return (0); error: if (dd != NULL) dsl_dir_rele(dd, FTAG); return (ret); } /* * This function deals with the intricacies of updating wrapping * key references and encryption roots recursively in the event * of a call to 'zfs change-key' or 'zfs promote'. The 'skip' * parameter should always be set to B_FALSE when called * externally. */ static void spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, uint64_t new_rddobj, dsl_wrapping_key_t *wkey, boolean_t skip, dmu_tx_t *tx) { int ret; zap_cursor_t *zc; zap_attribute_t *za; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dir_t *dd = NULL; dsl_crypto_key_t *dck = NULL; uint64_t curr_rddobj; ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock)); /* hold the dd */ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); /* ignore special dsl dirs */ if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { dsl_dir_rele(dd, FTAG); return; } ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); VERIFY(ret == 0 || ret == ENOENT); /* * Stop recursing if this dsl dir didn't inherit from the root * or if this dd is a clone. */ if (ret == ENOENT || (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd)))) { dsl_dir_rele(dd, FTAG); return; } /* * If we don't have a wrapping key just update the dck to reflect the * new encryption root. Otherwise rewrap the entire dck and re-sync it * to disk. If skip is set, we don't do any of this work. */ if (!skip) { if (wkey == NULL) { VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); } else { VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, FTAG, &dck)); dsl_wrapping_key_hold(wkey, dck); dsl_wrapping_key_rele(dck->dck_wkey, dck); dck->dck_wkey = wkey; dsl_crypto_key_sync(dck, tx); spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); } } zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* Recurse into all child dsl dirs. */ for (zap_cursor_init(zc, dp->dp_meta_objset, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { spa_keystore_change_key_sync_impl(rddobj, za->za_first_integer, new_rddobj, wkey, B_FALSE, tx); } zap_cursor_fini(zc); /* * Recurse into all dsl dirs of clones. We utilize the skip parameter * here so that we don't attempt to process the clones directly. This * is because the clone and its origin share the same dck, which has * already been updated. */ for (zap_cursor_init(zc, dp->dp_meta_objset, dsl_dir_phys(dd)->dd_clones); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { dsl_dataset_t *clone; VERIFY0(dsl_dataset_hold_obj(dp, za->za_first_integer, FTAG, &clone)); spa_keystore_change_key_sync_impl(rddobj, clone->ds_dir->dd_object, new_rddobj, wkey, B_TRUE, tx); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(zc); kmem_free(za, sizeof (zap_attribute_t)); kmem_free(zc, sizeof (zap_cursor_t)); dsl_dir_rele(dd, FTAG); } static void spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_t *ds; avl_index_t where; dsl_pool_t *dp = dmu_tx_pool(tx); spa_t *spa = dp->dp_spa; spa_keystore_change_key_args_t *skcka = arg; dsl_crypto_params_t *dcp = skcka->skcka_cp; dsl_wrapping_key_t *wkey = NULL, *found_wkey; dsl_wrapping_key_t wkey_search; const char *keylocation = dcp->cp_keylocation; uint64_t rddobj, new_rddobj; /* create and initialize the wrapping key */ VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds)); ASSERT(!ds->ds_is_snapshot); if (dcp->cp_cmd == DCP_CMD_NEW_KEY || dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { /* * We are changing to a new wkey. Set additional properties * which can be sent along with this ioctl. Note that this * command can set keylocation even if it can't normally be * set via 'zfs set' due to a non-local keylocation. */ if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { wkey = dcp->cp_wkey; wkey->wk_ddobj = ds->ds_dir->dd_object; } else { keylocation = "prompt"; } if (keylocation != NULL) { dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); } VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj)); new_rddobj = ds->ds_dir->dd_object; } else { /* * We are inheritting the parent's wkey. Unset any local * keylocation and grab a reference to the wkey. */ if (dcp->cp_cmd == DCP_CMD_INHERIT) { VERIFY0(spa_keystore_wkey_hold_dd(spa, ds->ds_dir->dd_parent, FTAG, &wkey)); } dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, 0, 0, NULL, tx); rddobj = ds->ds_dir->dd_object; VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir->dd_parent, &new_rddobj)); } if (wkey == NULL) { ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT || dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY); } rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* recurse through all children and rewrap their keys */ spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, new_rddobj, wkey, B_FALSE, tx); /* * All references to the old wkey should be released now (if it * existed). Replace the wrapping key. */ wkey_search.wk_ddobj = ds->ds_dir->dd_object; found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL); if (found_wkey != NULL) { ASSERT0(zfs_refcount_count(&found_wkey->wk_refcnt)); avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); dsl_wrapping_key_free(found_wkey); } if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); } else if (wkey != NULL) { dsl_wrapping_key_rele(wkey, FTAG); } rw_exit(&spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(ds, FTAG); } int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) { spa_keystore_change_key_args_t skcka; /* initialize the args struct */ skcka.skcka_dsname = dsname; skcka.skcka_cp = dcp; /* * Perform the actual work in syncing context. The blocks modified * here could be calculated but it would require holding the pool * lock and traversing all of the datasets that will have their keys * changed. */ return (dsl_sync_task(dsname, spa_keystore_change_key_check, spa_keystore_change_key_sync, &skcka, 15, ZFS_SPACE_CHECK_RESERVED)); } int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) { int ret; uint64_t curr_rddobj, parent_rddobj; if (dd->dd_crypto_obj == 0) return (0); ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); if (ret != 0) goto error; /* * if this is not an encryption root, we must make sure we are not * moving dd to a new encryption root */ if (dd->dd_object != curr_rddobj) { ret = dsl_dir_get_encryption_root_ddobj(newparent, &parent_rddobj); if (ret != 0) goto error; if (parent_rddobj != curr_rddobj) { ret = SET_ERROR(EACCES); goto error; } } return (0); error: return (ret); } /* * Check to make sure that a promote from targetdd to origindd will not require * any key rewraps. */ int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) { int ret; uint64_t rddobj, op_rddobj, tp_rddobj; /* If the dataset is not encrypted we don't need to check anything */ if (origin->dd_crypto_obj == 0) return (0); /* * If we are not changing the first origin snapshot in a chain * the encryption root won't change either. */ if (dsl_dir_is_clone(origin)) return (0); /* * If the origin is the encryption root we will update * the DSL Crypto Key to point to the target instead. */ ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj); if (ret != 0) return (ret); if (rddobj == origin->dd_object) return (0); /* * The origin is inheriting its encryption root from its parent. * Check that the parent of the target has the same encryption root. */ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); if (ret == ENOENT) return (SET_ERROR(EACCES)); else if (ret != 0) return (ret); ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); if (ret == ENOENT) return (SET_ERROR(EACCES)); else if (ret != 0) return (ret); if (op_rddobj != tp_rddobj) return (SET_ERROR(EACCES)); return (0); } void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, dmu_tx_t *tx) { uint64_t rddobj; dsl_pool_t *dp = target->dd_pool; dsl_dataset_t *targetds; dsl_dataset_t *originds; char *keylocation; if (origin->dd_crypto_obj == 0) return; if (dsl_dir_is_clone(origin)) return; VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj)); if (rddobj != origin->dd_object) return; /* * If the target is being promoted to the encryption root update the * DSL Crypto Key and keylocation to reflect that. We also need to * update the DSL Crypto Keys of all children inheritting their * encryption root to point to the new target. Otherwise, the check * function ensured that the encryption root will not change. */ keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds)); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds)); VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), 1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE)); dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, 0, 0, NULL, tx); rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, target->dd_object, NULL, B_FALSE, tx); rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(targetds, FTAG); dsl_dataset_rele(originds, FTAG); kmem_free(keylocation, ZAP_MAXVALUELEN); } int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt) { int ret; uint64_t pcrypt, crypt; dsl_crypto_params_t dummy_dcp = { 0 }; if (will_encrypt != NULL) *will_encrypt = B_FALSE; if (dcp == NULL) dcp = &dummy_dcp; if (dcp->cp_cmd != DCP_CMD_NONE) return (SET_ERROR(EINVAL)); if (parentdd != NULL) { ret = dsl_dir_get_crypt(parentdd, &pcrypt); if (ret != 0) return (ret); } else { pcrypt = ZIO_CRYPT_OFF; } crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt; ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); /* check for valid dcp with no encryption (inherited or local) */ if (crypt == ZIO_CRYPT_OFF) { /* Must not specify encryption params */ if (dcp->cp_wkey != NULL || (dcp->cp_keylocation != NULL && strcmp(dcp->cp_keylocation, "none") != 0)) return (SET_ERROR(EINVAL)); return (0); } if (will_encrypt != NULL) *will_encrypt = B_TRUE; /* * We will now definitely be encrypting. Check the feature flag. When * creating the pool the caller will check this for us since we won't * technically have the feature activated yet. */ if (parentdd != NULL && !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, SPA_FEATURE_ENCRYPTION)) { return (SET_ERROR(EOPNOTSUPP)); } /* Check for errata #4 (encryption enabled, bookmark_v2 disabled) */ if (parentdd != NULL && !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, SPA_FEATURE_BOOKMARK_V2)) { return (SET_ERROR(EOPNOTSUPP)); } /* handle inheritance */ if (dcp->cp_wkey == NULL) { ASSERT3P(parentdd, !=, NULL); /* key must be fully unspecified */ if (dcp->cp_keylocation != NULL) return (SET_ERROR(EINVAL)); /* parent must have a key to inherit */ if (pcrypt == ZIO_CRYPT_OFF) return (SET_ERROR(EINVAL)); /* check for parent key */ ret = dmu_objset_check_wkey_loaded(parentdd); if (ret != 0) return (ret); return (0); } /* At this point we should have a fully specified key. Check location */ if (dcp->cp_keylocation == NULL || !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) return (SET_ERROR(EINVAL)); /* Must have fully specified keyformat */ switch (dcp->cp_wkey->wk_keyformat) { case ZFS_KEYFORMAT_HEX: case ZFS_KEYFORMAT_RAW: /* requires no pbkdf2 iters and salt */ if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) return (SET_ERROR(EINVAL)); break; case ZFS_KEYFORMAT_PASSPHRASE: /* requires pbkdf2 iters and salt */ if (dcp->cp_wkey->wk_salt == 0 || dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) return (SET_ERROR(EINVAL)); break; case ZFS_KEYFORMAT_NONE: default: /* keyformat must be specified and valid */ return (SET_ERROR(EINVAL)); } return (0); } void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; uint64_t crypt; dsl_wrapping_key_t *wkey; /* clones always use their origin's wrapping key */ if (dsl_dir_is_clone(dd)) { ASSERT3P(dcp, ==, NULL); /* * If this is an encrypted clone we just need to clone the * dck into dd. Zapify the dd so we can do that. */ if (origin->ds_dir->dd_crypto_obj != 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); dd->dd_crypto_obj = dsl_crypto_key_clone_sync(origin->ds_dir, tx); VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); } return; } /* * A NULL dcp at this point indicates this is the origin dataset * which does not have an objset to encrypt. Raw receives will handle * encryption separately later. In both cases we can simply return. */ if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV) return; crypt = dcp->cp_crypt; wkey = dcp->cp_wkey; /* figure out the effective crypt */ if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL) VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt)); /* if we aren't doing encryption just return */ if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT) return; /* zapify the dd so that we can add the crypto key obj to it */ dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); /* use the new key if given or inherit from the parent */ if (wkey == NULL) { VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa, dd->dd_parent, FTAG, &wkey)); } else { wkey->wk_ddobj = dd->dd_object; } ASSERT3P(wkey, !=, NULL); /* Create or clone the DSL crypto key and activate the feature */ dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx); VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, (void *)B_TRUE, tx); /* * If we inherited the wrapping key we release our reference now. * Otherwise, this is a new key and we need to load it into the * keystore. */ if (dcp->cp_wkey == NULL) { dsl_wrapping_key_rele(wkey, FTAG); } else { VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey)); } } typedef struct dsl_crypto_recv_key_arg { uint64_t dcrka_dsobj; uint64_t dcrka_fromobj; dmu_objset_type_t dcrka_ostype; nvlist_t *dcrka_nvl; boolean_t dcrka_do_key; } dsl_crypto_recv_key_arg_t; static int dsl_crypto_recv_raw_objset_check(dsl_dataset_t *ds, dsl_dataset_t *fromds, dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx) { int ret; objset_t *os; dnode_t *mdn; uint8_t *buf = NULL; uint_t len; uint64_t intval, nlevels, blksz, ibs; uint64_t nblkptr, maxblkid; if (ostype != DMU_OST_ZFS && ostype != DMU_OST_ZVOL) return (SET_ERROR(EINVAL)); /* raw receives also need info about the structure of the metadnode */ ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval); if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval); if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels); if (ret != 0 || nlevels > DN_MAX_LEVELS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz); if (ret != 0 || blksz < SPA_MINBLOCKSIZE) return (SET_ERROR(EINVAL)); else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa)) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs); if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT || ibs > DN_MAX_INDBLKSHIFT) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr); if (ret != 0 || nblkptr != DN_MAX_NBLKPTR) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); if (ret != 0) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) return (SET_ERROR(EINVAL)); ret = dmu_objset_from_ds(ds, &os); if (ret != 0) return (ret); mdn = DMU_META_DNODE(os); /* * If we already created the objset, make sure its unchangeable * properties match the ones received in the nvlist. */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) && (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz || mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) { rrw_exit(&ds->ds_bp_rwlock, FTAG); return (SET_ERROR(EINVAL)); } rrw_exit(&ds->ds_bp_rwlock, FTAG); /* * Check that the ivset guid of the fromds matches the one from the * send stream. Older versions of the encryption code did not have * an ivset guid on the from dataset and did not send one in the * stream. For these streams we provide the * zfs_disable_ivset_guid_check tunable to allow these datasets to * be received with a generated ivset guid. */ if (fromds != NULL && !zfs_disable_ivset_guid_check) { uint64_t from_ivset_guid = 0; intval = 0; (void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval); (void) zap_lookup(tx->tx_pool->dp_meta_objset, fromds->ds_object, DS_FIELD_IVSET_GUID, sizeof (from_ivset_guid), 1, &from_ivset_guid); if (intval == 0 || from_ivset_guid == 0) return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING)); if (intval != from_ivset_guid) return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH)); } return (0); } static void dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx) { dsl_pool_t *dp = tx->tx_pool; objset_t *os; dnode_t *mdn; zio_t *zio; uint8_t *portable_mac; uint_t len; uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; boolean_t newds = B_FALSE; VERIFY0(dmu_objset_from_ds(ds, &os)); mdn = DMU_META_DNODE(os); /* * Fetch the values we need from the nvlist. "to_ivset_guid" must * be set on the snapshot, which doesn't exist yet. The receive * code will take care of this for us later. */ compress = fnvlist_lookup_uint64(nvl, "mdn_compress"); checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum"); nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac, &len)); /* if we haven't created an objset for the ds yet, do that now */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { (void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), ostype, nlevels, blksz, ibs, tx); newds = B_TRUE; } rrw_exit(&ds->ds_bp_rwlock, FTAG); /* * Set the portable MAC. The local MAC will always be zero since the * incoming data will all be portable and user accounting will be * deferred until the next mount. Afterwards, flag the os to be * written out raw next time. */ arc_release(os->os_phys_buf, &os->os_phys_buf); memcpy(os->os_phys->os_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); memset(os->os_phys->os_local_mac, 0, ZIO_OBJSET_MAC_LEN); os->os_flags &= ~OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; /* set metadnode compression and checksum */ mdn->dn_compress = compress; mdn->dn_checksum = checksum; rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE); rw_exit(&mdn->dn_struct_rwlock); /* * We can't normally dirty the dataset in syncing context unless * we are creating a new dataset. In this case, we perform a * pseudo txg sync here instead. */ if (newds) { dsl_dataset_dirty(ds, tx); } else { zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); VERIFY0(zio_wait(zio)); /* dsl_dataset_sync_done will drop this reference. */ dmu_buf_add_ref(ds->ds_dbuf, ds); dsl_dataset_sync_done(ds, tx); } } int dsl_crypto_recv_raw_key_check(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) { int ret; objset_t *mos = tx->tx_pool->dp_meta_objset; uint8_t *buf = NULL; uint_t len; uint64_t intval, key_guid, version; boolean_t is_passphrase = B_FALSE; ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT); /* * Read and check all the encryption values from the nvlist. We need * all of the fields of a DSL Crypto Key, as well as a fully specified * wrapping key. */ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval); if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS || intval <= ZIO_CRYPT_OFF) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval); if (ret != 0) return (SET_ERROR(EINVAL)); /* * If this is an incremental receive make sure the given key guid * matches the one we already have. */ if (ds->ds_dir->dd_crypto_obj != 0) { ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); if (ret != 0) return (ret); if (intval != key_guid) return (SET_ERROR(EACCES)); } ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, &buf, &len); if (ret != 0 || len != MASTER_KEY_MAX_LEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, &buf, &len); if (ret != 0 || len != SHA512_HMAC_KEYLEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len); if (ret != 0 || len != WRAPPING_IV_LEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len); if (ret != 0 || len != WRAPPING_MAC_LEN) return (SET_ERROR(EINVAL)); /* * We don't support receiving old on-disk formats. The version 0 * implementation protected several fields in an objset that were * not always portable during a raw receive. As a result, we call * the old version an on-disk errata #3. */ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval); if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS || intval == ZFS_KEYFORMAT_NONE) return (SET_ERROR(EINVAL)); is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE); /* * for raw receives we allow any number of pbkdf2iters since there * won't be a chance for the user to change it. */ ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval); if (ret != 0 || (is_passphrase == (intval == 0))) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &intval); if (ret != 0 || (is_passphrase == (intval == 0))) return (SET_ERROR(EINVAL)); return (0); } void dsl_crypto_recv_raw_key_sync(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) { dsl_pool_t *dp = tx->tx_pool; objset_t *mos = dp->dp_meta_objset; dsl_dir_t *dd = ds->ds_dir; uint_t len; uint64_t rddobj, one = 1; uint8_t *keydata, *hmac_keydata, *iv, *mac; uint64_t crypt, key_guid, keyformat, iters, salt; uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; const char *keylocation = "prompt"; /* lookup the values we need to create the DSL Crypto Key */ crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE); key_guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID); keyformat = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); iters = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); salt = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, &keydata, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, &hmac_keydata, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len)); /* if this is a new dataset setup the DSL Crypto Key. */ if (dd->dd_crypto_obj == 0) { /* zapify the dsl dir so we can add the key object to it */ dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); /* create the DSL Crypto Key on disk and activate the feature */ dd->dd_crypto_obj = zap_create(mos, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); dsl_dataset_activate_feature(ds->ds_object, SPA_FEATURE_ENCRYPTION, (void *)B_TRUE, tx); ds->ds_feature[SPA_FEATURE_ENCRYPTION] = (void *)B_TRUE; /* save the dd_crypto_obj on disk */ VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); /* * Set the keylocation to prompt by default. If keylocation * has been provided via the properties, this will be overridden * later. */ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); rddobj = dd->dd_object; } else { VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &rddobj)); } /* sync the key data to the ZAP object on disk */ dsl_crypto_key_sync_impl(mos, dd->dd_crypto_obj, crypt, rddobj, key_guid, iv, mac, keydata, hmac_keydata, keyformat, salt, iters, tx); } static int dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) { int ret; dsl_crypto_recv_key_arg_t *dcrka = arg; dsl_dataset_t *ds = NULL, *fromds = NULL; ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); if (ret != 0) goto out; if (dcrka->dcrka_fromobj != 0) { ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_fromobj, FTAG, &fromds); if (ret != 0) goto out; } ret = dsl_crypto_recv_raw_objset_check(ds, fromds, dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx); if (ret != 0) goto out; /* * We run this check even if we won't be doing this part of * the receive now so that we don't make the user wait until * the receive finishes to fail. */ ret = dsl_crypto_recv_raw_key_check(ds, dcrka->dcrka_nvl, tx); if (ret != 0) goto out; out: if (ds != NULL) dsl_dataset_rele(ds, FTAG); if (fromds != NULL) dsl_dataset_rele(fromds, FTAG); return (ret); } static void dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) { dsl_crypto_recv_key_arg_t *dcrka = arg; dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds)); dsl_crypto_recv_raw_objset_sync(ds, dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx); if (dcrka->dcrka_do_key) dsl_crypto_recv_raw_key_sync(ds, dcrka->dcrka_nvl, tx); dsl_dataset_rele(ds, FTAG); } /* * This function is used to sync an nvlist representing a DSL Crypto Key and * the associated encryption parameters. The key will be written exactly as is * without wrapping it. */ int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key) { dsl_crypto_recv_key_arg_t dcrka; dcrka.dcrka_dsobj = dsobj; dcrka.dcrka_fromobj = fromobj; dcrka.dcrka_ostype = ostype; dcrka.dcrka_nvl = nvl; dcrka.dcrka_do_key = do_key; return (dsl_sync_task(poolname, dsl_crypto_recv_key_check, dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL)); } int dsl_crypto_populate_key_nvlist(objset_t *os, uint64_t from_ivset_guid, nvlist_t **nvl_out) { int ret; dsl_dataset_t *ds = os->os_dsl_dataset; dnode_t *mdn; uint64_t rddobj; nvlist_t *nvl = NULL; uint64_t dckobj = ds->ds_dir->dd_crypto_obj; dsl_dir_t *rdd = NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t crypt = 0, key_guid = 0, format = 0; uint64_t iters = 0, salt = 0, version = 0; uint64_t to_ivset_guid = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; ASSERT(dckobj != 0); mdn = DMU_META_DNODE(os); nvl = fnvlist_alloc(); /* lookup values from the DSL Crypto Key */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, raw_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, raw_hmac_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac); if (ret != 0) goto error; /* see zfs_disable_ivset_guid_check tunable for errata info */ ret = zap_lookup(mos, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, &to_ivset_guid); if (ret != 0) ASSERT3U(dp->dp_spa->spa_errata, !=, 0); /* * We don't support raw sends of legacy on-disk formats. See the * comment in dsl_crypto_recv_key_check() for details. */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; ret = SET_ERROR(ENOTSUP); goto error; } /* * Lookup wrapping key properties. An early version of the code did * not correctly add these values to the wrapping key or the DSL * Crypto Key on disk for non encryption roots, so to be safe we * always take the slightly circuitous route of looking it up from * the encryption root's key. */ ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj); if (ret != 0) goto error; dsl_pool_config_enter(dp, FTAG); ret = dsl_dir_hold_obj(dp, rddobj, NULL, FTAG, &rdd); if (ret != 0) goto error_unlock; ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format); if (ret != 0) goto error_unlock; if (format == ZFS_KEYFORMAT_PASSPHRASE) { ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); if (ret != 0) goto error_unlock; ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); if (ret != 0) goto error_unlock; } dsl_dir_rele(rdd, FTAG); dsl_pool_config_exit(dp, FTAG); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, key_guid); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, raw_keydata, MASTER_KEY_MAX_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, raw_hmac_keydata, SHA512_HMAC_KEYLEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv, WRAPPING_IV_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac, WRAPPING_MAC_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac", os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN)); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum); fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress); fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels); fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); fnvlist_add_uint64(nvl, "to_ivset_guid", to_ivset_guid); fnvlist_add_uint64(nvl, "from_ivset_guid", from_ivset_guid); *nvl_out = nvl; return (0); error_unlock: dsl_pool_config_exit(dp, FTAG); error: if (rdd != NULL) dsl_dir_rele(rdd, FTAG); nvlist_free(nvl); *nvl_out = NULL; return (ret); } uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { dsl_crypto_key_t dck; uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; uint64_t one = 1ULL; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(crypt, >, ZIO_CRYPT_OFF); /* create the DSL Crypto Key ZAP object */ dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); /* fill in the key (on the stack) and sync it to disk */ dck.dck_wkey = wkey; VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key)); dsl_crypto_key_sync(&dck, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); zio_crypt_key_destroy(&dck.dck_key); memset(&dck.dck_key, 0, sizeof (zio_crypt_key_t)); return (dck.dck_obj); } uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx) { objset_t *mos = tx->tx_pool->dp_meta_objset; ASSERT(dmu_tx_is_syncing(tx)); VERIFY0(zap_increment(mos, origindd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, 1, tx)); return (origindd->dd_crypto_obj); } void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx) { objset_t *mos = tx->tx_pool->dp_meta_objset; uint64_t refcnt; /* Decrement the refcount, destroy if this is the last reference */ VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &refcnt)); if (refcnt != 1) { VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, -1, tx)); } else { VERIFY0(zap_destroy(mos, dckobj, tx)); } } void dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t intval; dsl_dir_t *dd = ds->ds_dir; dsl_dir_t *enc_root; char buf[ZFS_MAX_DATASET_NAME_LEN]; if (dd->dd_crypto_obj == 0) return; intval = dsl_dataset_get_keystatus(dd); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval); if (dsl_dir_get_crypt(dd, &intval) == 0) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval); if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_IVSET_GUID, intval); } if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { if (dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, &enc_root) == 0) { dsl_dir_name(enc_root, buf); dsl_dir_rele(enc_root, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); } } } int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) { int ret; dsl_crypto_key_t *dck = NULL; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; ret = zio_crypt_key_get_salt(&dck->dck_key, salt); if (ret != 0) goto error; spa_keystore_dsl_key_rele(spa, dck, FTAG); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); return (ret); } /* * Objset blocks are a special case for MAC generation. These blocks have 2 * 256-bit MACs which are embedded within the block itself, rather than a * single 128 bit MAC. As a result, this function handles encoding and decoding * the MACs on its own, unlike other functions in this file. */ int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, boolean_t byteswap) { int ret; dsl_crypto_key_t *dck = NULL; void *buf = abd_borrow_buf_copy(abd, datalen); objset_phys_t *osp = buf; uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; /* calculate both HMACs */ ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen, byteswap, portable_mac, local_mac); if (ret != 0) goto error; spa_keystore_dsl_key_rele(spa, dck, FTAG); /* if we are generating encode the HMACs in the objset_phys_t */ if (generate) { memcpy(osp->os_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); memcpy(osp->os_local_mac, local_mac, ZIO_OBJSET_MAC_LEN); abd_return_buf_copy(abd, buf, datalen); return (0); } if (memcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 || memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) { abd_return_buf(abd, buf, datalen); return (SET_ERROR(ECKSUM)); } abd_return_buf(abd, buf, datalen); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); abd_return_buf(abd, buf, datalen); return (ret); } int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, uint8_t *mac) { int ret; dsl_crypto_key_t *dck = NULL; uint8_t *buf = abd_borrow_buf_copy(abd, datalen); uint8_t digestbuf[ZIO_DATA_MAC_LEN]; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; /* perform the hmac */ ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, digestbuf, ZIO_DATA_MAC_LEN); if (ret != 0) goto error; abd_return_buf(abd, buf, datalen); spa_keystore_dsl_key_rele(spa, dck, FTAG); /* * Truncate and fill in mac buffer if we were asked to generate a MAC. * Otherwise verify that the MAC matched what we expected. */ if (generate) { memcpy(mac, digestbuf, ZIO_DATA_MAC_LEN); return (0); } if (memcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0) return (SET_ERROR(ECKSUM)); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); abd_return_buf(abd, buf, datalen); return (ret); } /* * This function serves as a multiplexer for encryption and decryption of * all blocks (except the L2ARC). For encryption, it will populate the IV, * salt, MAC, and cabd (the ciphertext). On decryption it will simply use * these fields to populate pabd (the plaintext). */ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) { int ret; dsl_crypto_key_t *dck = NULL; uint8_t *plainbuf = NULL, *cipherbuf = NULL; ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, zb->zb_objset, FTAG, &dck); if (ret != 0) { ret = SET_ERROR(EACCES); return (ret); } if (encrypt) { plainbuf = abd_borrow_buf_copy(pabd, datalen); cipherbuf = abd_borrow_buf(cabd, datalen); } else { plainbuf = abd_borrow_buf(pabd, datalen); cipherbuf = abd_borrow_buf_copy(cabd, datalen); } /* * Both encryption and decryption functions need a salt for key * generation and an IV. When encrypting a non-dedup block, we * generate the salt and IV randomly to be stored by the caller. Dedup * blocks perform a (more expensive) HMAC of the plaintext to obtain * the salt and the IV. ZIL blocks have their salt and IV generated * at allocation time in zio_alloc_zil(). On decryption, we simply use * the provided values. */ if (encrypt && ot != DMU_OT_INTENT_LOG && !dedup) { ret = zio_crypt_key_get_salt(&dck->dck_key, salt); if (ret != 0) goto error; ret = zio_crypt_generate_iv(iv); if (ret != 0) goto error; } else if (encrypt && dedup) { ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key, plainbuf, datalen, iv, salt); if (ret != 0) goto error; } /* call lower level function to perform encryption / decryption */ ret = zio_do_crypt_data(encrypt, &dck->dck_key, ot, bswap, salt, iv, mac, datalen, plainbuf, cipherbuf, no_crypt); /* * Handle injected decryption faults. Unfortunately, we cannot inject * faults for dnode blocks because we might trigger the panic in * dbuf_prepare_encrypted_dnode_leaf(), which exists because syncing * context is not prepared to handle malicious decryption failures. */ if (zio_injection_enabled && !encrypt && ot != DMU_OT_DNODE && ret == 0) ret = zio_handle_decrypt_injection(spa, zb, ot, ECKSUM); if (ret != 0) goto error; if (encrypt) { abd_return_buf(pabd, plainbuf, datalen); abd_return_buf_copy(cabd, cipherbuf, datalen); } else { abd_return_buf_copy(pabd, plainbuf, datalen); abd_return_buf(cabd, cipherbuf, datalen); } spa_keystore_dsl_key_rele(spa, dck, FTAG); return (0); error: if (encrypt) { /* zero out any state we might have changed while encrypting */ memset(salt, 0, ZIO_DATA_SALT_LEN); memset(iv, 0, ZIO_DATA_IV_LEN); memset(mac, 0, ZIO_DATA_MAC_LEN); abd_return_buf(pabd, plainbuf, datalen); abd_return_buf_copy(cabd, cipherbuf, datalen); } else { abd_return_buf_copy(pabd, plainbuf, datalen); abd_return_buf(cabd, cipherbuf, datalen); } spa_keystore_dsl_key_rele(spa, dck, FTAG); return (ret); } ZFS_MODULE_PARAM(zfs, zfs_, disable_ivset_guid_check, INT, ZMOD_RW, "Set to allow raw receives without IVset guids"); diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index 12fcecf0dd3f..21fef4c621b5 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -1,5024 +1,5030 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014 RackTop Systems. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2020 The FreeBSD Foundation [1] * * [1] Portions of this software were developed by Allan Jude * under sponsorship from the FreeBSD Foundation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The SPA supports block sizes up to 16MB. However, very large blocks * can have an impact on i/o latency (e.g. tying up a spinning disk for * ~300ms), and also potentially on the memory allocator. Therefore, * we did not allow the recordsize to be set larger than zfs_max_recordsize * (former default: 1MB). Larger blocks could be created by changing this * tunable, and pools with larger blocks could always be imported and used, * regardless of this setting. * * We do, however, still limit it by default to 1M on x86_32, because Linux's * 3/1 memory split doesn't leave much room for 16M chunks. */ #ifdef _ILP32 int zfs_max_recordsize = 1 * 1024 * 1024; #else int zfs_max_recordsize = 16 * 1024 * 1024; #endif static int zfs_allow_redacted_dataset_mount = 0; +int zfs_snapshot_history_enabled = 1; + #define SWITCH64(x, y) \ { \ uint64_t __tmp = (x); \ (x) = (y); \ (y) = __tmp; \ } #define DS_REF_MAX (1ULL << 62) static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx); static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx); static void unload_zfeature(dsl_dataset_t *ds, spa_feature_t f); extern int spa_asize_inflation; static zil_header_t zero_zil; /* * Figure out how much of this delta should be propagated to the dsl_dir * layer. If there's a refreservation, that space has already been * partially accounted for in our ancestors. */ static int64_t parent_delta(dsl_dataset_t *ds, int64_t delta) { dsl_dataset_phys_t *ds_phys; uint64_t old_bytes, new_bytes; if (ds->ds_reserved == 0) return (delta); ds_phys = dsl_dataset_phys(ds); old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); return (new_bytes - old_bytes); } void dsl_dataset_feature_set_activation(const blkptr_t *bp, dsl_dataset_t *ds) { spa_feature_t f; if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) { ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] = (void *)B_TRUE; } f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); if (f != SPA_FEATURE_NONE) { ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN); ds->ds_feature_activation[f] = (void *)B_TRUE; } f = zio_compress_to_feature(BP_GET_COMPRESS(bp)); if (f != SPA_FEATURE_NONE) { ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN); ds->ds_feature_activation[f] = (void *)B_TRUE; } } void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; int used = bp_get_dsize_sync(spa, bp); int compressed = BP_GET_PSIZE(bp); int uncompressed = BP_GET_UCSIZE(bp); int64_t delta; dprintf_bp(bp, "ds=%p", ds); ASSERT(dmu_tx_is_syncing(tx)); /* It could have been compressed away to nothing */ if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) return; ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); if (ds == NULL) { dsl_pool_mos_diduse_space(tx->tx_pool, used, compressed, uncompressed); return; } ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_lock); delta = parent_delta(ds, used); dsl_dataset_phys(ds)->ds_referenced_bytes += used; dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; dsl_dataset_phys(ds)->ds_unique_bytes += used; dsl_dataset_feature_set_activation(bp, ds); /* * Track block for livelist, but ignore embedded blocks because * they do not need to be freed. */ if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && bp->blk_birth > ds->ds_dir->dd_origin_txg && !(BP_IS_EMBEDDED(bp))) { ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LIVELIST)); bplist_append(&ds->ds_dir->dd_pending_allocs, bp); } mutex_exit(&ds->ds_lock); dsl_dir_diduse_transfer_space(ds->ds_dir, delta, compressed, uncompressed, used, DD_USED_REFRSRV, DD_USED_HEAD, tx); } /* * Called when the specified segment has been remapped, and is thus no * longer referenced in the head dataset. The vdev must be indirect. * * If the segment is referenced by a snapshot, put it on the remap deadlist. * Otherwise, add this segment to the obsolete spacemap. */ void dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset, uint64_t size, uint64_t birth, dmu_tx_t *tx) { spa_t *spa = ds->ds_dir->dd_pool->dp_spa; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(birth <= tx->tx_txg); ASSERT(!ds->ds_is_snapshot); if (birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { spa_vdev_indirect_mark_obsolete(spa, vdev, offset, size, tx); } else { blkptr_t fakebp; dva_t *dva = &fakebp.blk_dva[0]; ASSERT(ds != NULL); mutex_enter(&ds->ds_remap_deadlist_lock); if (!dsl_dataset_remap_deadlist_exists(ds)) { dsl_dataset_create_remap_deadlist(ds, tx); } mutex_exit(&ds->ds_remap_deadlist_lock); BP_ZERO(&fakebp); fakebp.blk_birth = birth; DVA_SET_VDEV(dva, vdev); DVA_SET_OFFSET(dva, offset); DVA_SET_ASIZE(dva, size); dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, B_FALSE, tx); } } int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, boolean_t async) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; int used = bp_get_dsize_sync(spa, bp); int compressed = BP_GET_PSIZE(bp); int uncompressed = BP_GET_UCSIZE(bp); if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) return (0); ASSERT(dmu_tx_is_syncing(tx)); ASSERT(bp->blk_birth <= tx->tx_txg); if (ds == NULL) { dsl_free(tx->tx_pool, tx->tx_txg, bp); dsl_pool_mos_diduse_space(tx->tx_pool, -used, -compressed, -uncompressed); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); ASSERT(!ds->ds_is_snapshot); dmu_buf_will_dirty(ds->ds_dbuf, tx); /* * Track block for livelist, but ignore embedded blocks because * they do not need to be freed. */ if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && bp->blk_birth > ds->ds_dir->dd_origin_txg && !(BP_IS_EMBEDDED(bp))) { ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LIVELIST)); bplist_append(&ds->ds_dir->dd_pending_frees, bp); } if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { int64_t delta; dprintf_bp(bp, "freeing ds=%llu", (u_longlong_t)ds->ds_object); dsl_free(tx->tx_pool, tx->tx_txg, bp); mutex_enter(&ds->ds_lock); ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || !DS_UNIQUE_IS_ACCURATE(ds)); delta = parent_delta(ds, -used); dsl_dataset_phys(ds)->ds_unique_bytes -= used; mutex_exit(&ds->ds_lock); dsl_dir_diduse_transfer_space(ds->ds_dir, delta, -compressed, -uncompressed, -used, DD_USED_REFRSRV, DD_USED_HEAD, tx); } else { dprintf_bp(bp, "putting on dead list: %s", ""); if (async) { /* * We are here as part of zio's write done callback, * which means we're a zio interrupt thread. We can't * call dsl_deadlist_insert() now because it may block * waiting for I/O. Instead, put bp on the deferred * queue and let dsl_pool_sync() finish the job. */ bplist_append(&ds->ds_pending_deadlist, bp); } else { dsl_deadlist_insert(&ds->ds_deadlist, bp, B_FALSE, tx); } ASSERT3U(ds->ds_prev->ds_object, ==, dsl_dataset_phys(ds)->ds_prev_snap_obj); ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object && bp->blk_birth > dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); mutex_enter(&ds->ds_prev->ds_lock); dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; mutex_exit(&ds->ds_prev->ds_lock); } if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { dsl_dir_transfer_space(ds->ds_dir, used, DD_USED_HEAD, DD_USED_SNAP, tx); } } dsl_bookmark_block_killed(ds, bp, tx); mutex_enter(&ds->ds_lock); ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); dsl_dataset_phys(ds)->ds_referenced_bytes -= used; ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; mutex_exit(&ds->ds_lock); return (used); } struct feature_type_uint64_array_arg { uint64_t length; uint64_t *array; }; static void unload_zfeature(dsl_dataset_t *ds, spa_feature_t f) { switch (spa_feature_table[f].fi_type) { case ZFEATURE_TYPE_BOOLEAN: break; case ZFEATURE_TYPE_UINT64_ARRAY: { struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f]; kmem_free(ftuaa->array, ftuaa->length * sizeof (uint64_t)); kmem_free(ftuaa, sizeof (*ftuaa)); break; } default: panic("Invalid zfeature type %d", spa_feature_table[f].fi_type); } } static int load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f) { int err = 0; switch (spa_feature_table[f].fi_type) { case ZFEATURE_TYPE_BOOLEAN: err = zap_contains(mos, ds->ds_object, spa_feature_table[f].fi_guid); if (err == 0) { ds->ds_feature[f] = (void *)B_TRUE; } else { ASSERT3U(err, ==, ENOENT); err = 0; } break; case ZFEATURE_TYPE_UINT64_ARRAY: { uint64_t int_size, num_int; uint64_t *data; err = zap_length(mos, ds->ds_object, spa_feature_table[f].fi_guid, &int_size, &num_int); if (err != 0) { ASSERT3U(err, ==, ENOENT); err = 0; break; } ASSERT3U(int_size, ==, sizeof (uint64_t)); data = kmem_alloc(int_size * num_int, KM_SLEEP); VERIFY0(zap_lookup(mos, ds->ds_object, spa_feature_table[f].fi_guid, int_size, num_int, data)); struct feature_type_uint64_array_arg *ftuaa = kmem_alloc(sizeof (*ftuaa), KM_SLEEP); ftuaa->length = num_int; ftuaa->array = data; ds->ds_feature[f] = ftuaa; break; } default: panic("Invalid zfeature type %d", spa_feature_table[f].fi_type); } return (err); } /* * We have to release the fsid synchronously or we risk that a subsequent * mount of the same dataset will fail to unique_insert the fsid. This * failure would manifest itself as the fsid of this dataset changing * between mounts which makes NFS clients quite unhappy. */ static void dsl_dataset_evict_sync(void *dbu) { dsl_dataset_t *ds = dbu; ASSERT(ds->ds_owner == NULL); unique_remove(ds->ds_fsid_guid); } static void dsl_dataset_evict_async(void *dbu) { dsl_dataset_t *ds = dbu; ASSERT(ds->ds_owner == NULL); ds->ds_dbuf = NULL; if (ds->ds_objset != NULL) dmu_objset_evict(ds->ds_objset); if (ds->ds_prev) { dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } dsl_bookmark_fini_ds(ds); bplist_destroy(&ds->ds_pending_deadlist); if (dsl_deadlist_is_open(&ds->ds_deadlist)) dsl_deadlist_close(&ds->ds_deadlist); if (dsl_deadlist_is_open(&ds->ds_remap_deadlist)) dsl_deadlist_close(&ds->ds_remap_deadlist); if (ds->ds_dir) dsl_dir_async_rele(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (dsl_dataset_feature_is_active(ds, f)) unload_zfeature(ds, f); } list_destroy(&ds->ds_prop_cbs); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_sendstream_lock); mutex_destroy(&ds->ds_remap_deadlist_lock); zfs_refcount_destroy(&ds->ds_longholds); rrw_destroy(&ds->ds_bp_rwlock); kmem_free(ds, sizeof (dsl_dataset_t)); } int dsl_dataset_get_snapname(dsl_dataset_t *ds) { dsl_dataset_phys_t *headphys; int err; dmu_buf_t *headdbuf; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; if (ds->ds_snapname[0]) return (0); if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) return (0); err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &headdbuf); if (err != 0) return (err); headphys = headdbuf->db_data; err = zap_value_search(dp->dp_meta_objset, headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); if (err != 0 && zfs_recover == B_TRUE) { err = 0; (void) snprintf(ds->ds_snapname, sizeof (ds->ds_snapname), "SNAPOBJ=%llu-ERR=%d", (unsigned long long)ds->ds_object, err); } dmu_buf_rele(headdbuf, FTAG); return (err); } int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; matchtype_t mt = 0; int err; if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_NORMALIZE; err = zap_lookup_norm(mos, snapobj, name, 8, 1, value, mt, NULL, 0, NULL); if (err == ENOTSUP && (mt & MT_NORMALIZE)) err = zap_lookup(mos, snapobj, name, 8, 1, value); return (err); } int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, boolean_t adj_cnt) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; matchtype_t mt = 0; int err; dsl_dir_snap_cmtime_update(ds->ds_dir, tx); if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_NORMALIZE; err = zap_remove_norm(mos, snapobj, name, mt, tx); if (err == ENOTSUP && (mt & MT_NORMALIZE)) err = zap_remove(mos, snapobj, name, tx); if (err == 0 && adj_cnt) dsl_fs_ss_count_adjust(ds->ds_dir, -1, DD_FIELD_SNAPSHOT_COUNT, tx); return (err); } boolean_t dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, const void *tag) { dmu_buf_t *dbuf = ds->ds_dbuf; boolean_t result = B_FALSE; if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, ds->ds_object, DMU_BONUS_BLKID, tag)) { if (ds == dmu_buf_get_user(dbuf)) result = B_TRUE; else dmu_buf_rele(dbuf, tag); } return (result); } int dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, const void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; dmu_buf_t *dbuf; dsl_dataset_t *ds; int err; dmu_object_info_t doi; ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); if (err != 0) return (err); /* Make sure dsobj has the correct object type. */ dmu_object_info_from_db(dbuf, &doi); if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { dmu_buf_rele(dbuf, tag); return (SET_ERROR(EINVAL)); } ds = dmu_buf_get_user(dbuf); if (ds == NULL) { dsl_dataset_t *winner = NULL; ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds->ds_dbuf = dbuf; ds->ds_object = dsobj; ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0; list_link_init(&ds->ds_synced_link); err = dsl_dir_hold_obj(dp, dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir); if (err != 0) { kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); return (err); } mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_remap_deadlist_lock, NULL, MUTEX_DEFAULT, NULL); rrw_init(&ds->ds_bp_rwlock, B_FALSE); zfs_refcount_create(&ds->ds_longholds); bplist_create(&ds->ds_pending_deadlist); list_create(&ds->ds_sendstreams, sizeof (dmu_sendstatus_t), offsetof(dmu_sendstatus_t, dss_link)); list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t), offsetof(dsl_prop_cb_record_t, cbr_ds_node)); if (doi.doi_type == DMU_OTN_ZAP_METADATA) { spa_feature_t f; for (f = 0; f < SPA_FEATURES; f++) { if (!(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET)) continue; err = load_zfeature(mos, ds, f); } } if (!ds->ds_is_snapshot) { ds->ds_snapname[0] = '\0'; if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev); } err = dsl_bookmark_init_ds(ds); } else { if (zfs_flags & ZFS_DEBUG_SNAPNAMES) err = dsl_dataset_get_snapname(ds); if (err == 0 && dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { err = zap_count( ds->ds_dir->dd_pool->dp_meta_objset, dsl_dataset_phys(ds)->ds_userrefs_obj, &ds->ds_userrefs); } } if (err == 0 && !ds->ds_is_snapshot) { err = dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &ds->ds_reserved); if (err == 0) { err = dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFQUOTA), &ds->ds_quota); } } else { ds->ds_reserved = ds->ds_quota = 0; } if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 && ds->ds_is_snapshot && zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) { dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; } dsl_deadlist_open(&ds->ds_deadlist, mos, dsl_dataset_phys(ds)->ds_deadlist_obj); uint64_t remap_deadlist_obj = dsl_dataset_get_remap_deadlist_object(ds); if (remap_deadlist_obj != 0) { dsl_deadlist_open(&ds->ds_remap_deadlist, mos, remap_deadlist_obj); } dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync, dsl_dataset_evict_async, &ds->ds_dbuf); if (err == 0) winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); if (err != 0 || winner != NULL) { bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (dsl_deadlist_is_open(&ds->ds_remap_deadlist)) dsl_deadlist_close(&ds->ds_remap_deadlist); dsl_bookmark_fini_ds(ds); if (ds->ds_prev) dsl_dataset_rele(ds->ds_prev, ds); dsl_dir_rele(ds->ds_dir, ds); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (dsl_dataset_feature_is_active(ds, f)) unload_zfeature(ds, f); } list_destroy(&ds->ds_prop_cbs); list_destroy(&ds->ds_sendstreams); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_sendstream_lock); mutex_destroy(&ds->ds_remap_deadlist_lock); zfs_refcount_destroy(&ds->ds_longholds); rrw_destroy(&ds->ds_bp_rwlock); kmem_free(ds, sizeof (dsl_dataset_t)); if (err != 0) { dmu_buf_rele(dbuf, tag); return (err); } ds = winner; } else { ds->ds_fsid_guid = unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); if (ds->ds_fsid_guid != dsl_dataset_phys(ds)->ds_fsid_guid) { zfs_dbgmsg("ds_fsid_guid changed from " "%llx to %llx for pool %s dataset id %llu", (long long) dsl_dataset_phys(ds)->ds_fsid_guid, (long long)ds->ds_fsid_guid, spa_name(dp->dp_spa), (u_longlong_t)dsobj); } } } ASSERT3P(ds->ds_dbuf, ==, dbuf); ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; return (0); } int dsl_dataset_create_key_mapping(dsl_dataset_t *ds) { dsl_dir_t *dd = ds->ds_dir; if (dd->dd_crypto_obj == 0) return (0); return (spa_keystore_create_mapping(dd->dd_pool->dp_spa, ds, ds, &ds->ds_key_mapping)); } int dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { int err; err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); if (err != 0) return (err); ASSERT3P(*dsp, !=, NULL); if (flags & DS_HOLD_FLAG_DECRYPT) { err = dsl_dataset_create_key_mapping(*dsp); if (err != 0) dsl_dataset_rele(*dsp, tag); } return (err); } int dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; const char *snapname; uint64_t obj; int err = 0; dsl_dataset_t *ds; err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); if (err != 0) return (err); ASSERT(dsl_pool_config_held(dp)); obj = dsl_dir_phys(dd)->dd_head_dataset_obj; if (obj != 0) err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds); else err = SET_ERROR(ENOENT); /* we may be looking for a snapshot */ if (err == 0 && snapname != NULL) { dsl_dataset_t *snap_ds; if (*snapname++ != '@') { dsl_dataset_rele_flags(ds, flags, tag); dsl_dir_rele(dd, FTAG); return (SET_ERROR(ENOENT)); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(ds, snapname, &obj); if (err == 0) { err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &snap_ds); } dsl_dataset_rele_flags(ds, flags, tag); if (err == 0) { mutex_enter(&snap_ds->ds_lock); if (snap_ds->ds_snapname[0] == 0) (void) strlcpy(snap_ds->ds_snapname, snapname, sizeof (snap_ds->ds_snapname)); mutex_exit(&snap_ds->ds_lock); ds = snap_ds; } } if (err == 0) *dsp = ds; dsl_dir_rele(dd, FTAG); return (err); } int dsl_dataset_hold(dsl_pool_t *dp, const char *name, const void *tag, dsl_dataset_t **dsp) { return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp)); } static int dsl_dataset_own_obj_impl(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, const void *tag, boolean_t override, dsl_dataset_t **dsp) { int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag, override)) { dsl_dataset_rele_flags(*dsp, flags, tag); *dsp = NULL; return (SET_ERROR(EBUSY)); } return (0); } int dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_FALSE, dsp)); } int dsl_dataset_own_obj_force(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_TRUE, dsp)); } static int dsl_dataset_own_impl(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, const void *tag, boolean_t override, dsl_dataset_t **dsp) { int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag, override)) { dsl_dataset_rele_flags(*dsp, flags, tag); return (SET_ERROR(EBUSY)); } return (0); } int dsl_dataset_own_force(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { return (dsl_dataset_own_impl(dp, name, flags, tag, B_TRUE, dsp)); } int dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp) { return (dsl_dataset_own_impl(dp, name, flags, tag, B_FALSE, dsp)); } /* * See the comment above dsl_pool_hold() for details. In summary, a long * hold is used to prevent destruction of a dataset while the pool hold * is dropped, allowing other concurrent operations (e.g. spa_sync()). * * The dataset and pool must be held when this function is called. After it * is called, the pool hold may be released while the dataset is still held * and accessed. */ void dsl_dataset_long_hold(dsl_dataset_t *ds, const void *tag) { ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); (void) zfs_refcount_add(&ds->ds_longholds, tag); } void dsl_dataset_long_rele(dsl_dataset_t *ds, const void *tag) { (void) zfs_refcount_remove(&ds->ds_longholds, tag); } /* Return B_TRUE if there are any long holds on this dataset. */ boolean_t dsl_dataset_long_held(dsl_dataset_t *ds) { return (!zfs_refcount_is_zero(&ds->ds_longholds)); } void dsl_dataset_name(dsl_dataset_t *ds, char *name) { if (ds == NULL) { (void) strlcpy(name, "mos", ZFS_MAX_DATASET_NAME_LEN); } else { dsl_dir_name(ds->ds_dir, name); VERIFY0(dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); /* * We use a "recursive" mutex so that we * can call dprintf_ds() with ds_lock held. */ if (!MUTEX_HELD(&ds->ds_lock)) { mutex_enter(&ds->ds_lock); VERIFY3U(strlcat(name, ds->ds_snapname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); mutex_exit(&ds->ds_lock); } else { VERIFY3U(strlcat(name, ds->ds_snapname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); } } } } int dsl_dataset_namelen(dsl_dataset_t *ds) { VERIFY0(dsl_dataset_get_snapname(ds)); mutex_enter(&ds->ds_lock); int len = strlen(ds->ds_snapname); mutex_exit(&ds->ds_lock); /* add '@' if ds is a snap */ if (len > 0) len++; len += dsl_dir_namelen(ds->ds_dir); return (len); } void dsl_dataset_rele(dsl_dataset_t *ds, const void *tag) { dmu_buf_rele(ds->ds_dbuf, tag); } void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds) { dsl_dir_t *dd = ds->ds_dir; if (dd == NULL || dd->dd_crypto_obj == 0) return; (void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa, ds->ds_object, ds); } void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, const void *tag) { if (flags & DS_HOLD_FLAG_DECRYPT) dsl_dataset_remove_key_mapping(ds); dsl_dataset_rele(ds, tag); } void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, const void *tag) { ASSERT3P(ds->ds_owner, ==, tag); ASSERT(ds->ds_dbuf != NULL); mutex_enter(&ds->ds_lock); ds->ds_owner = NULL; mutex_exit(&ds->ds_lock); dsl_dataset_long_rele(ds, tag); dsl_dataset_rele_flags(ds, flags, tag); } boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, const void *tag, boolean_t override) { boolean_t gotit = FALSE; ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); mutex_enter(&ds->ds_lock); if (ds->ds_owner == NULL && (override || !(DS_IS_INCONSISTENT(ds) || (dsl_dataset_feature_is_active(ds, SPA_FEATURE_REDACTED_DATASETS) && !zfs_allow_redacted_dataset_mount)))) { ds->ds_owner = tag; dsl_dataset_long_hold(ds, tag); gotit = TRUE; } mutex_exit(&ds->ds_lock); return (gotit); } boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds) { boolean_t rv; mutex_enter(&ds->ds_lock); rv = (ds->ds_owner != NULL); mutex_exit(&ds->ds_lock); return (rv); } static boolean_t zfeature_active(spa_feature_t f, void *arg) { switch (spa_feature_table[f].fi_type) { case ZFEATURE_TYPE_BOOLEAN: { boolean_t val = (boolean_t)(uintptr_t)arg; ASSERT(val == B_FALSE || val == B_TRUE); return (val); } case ZFEATURE_TYPE_UINT64_ARRAY: /* * In this case, arg is a uint64_t array. The feature is active * if the array is non-null. */ return (arg != NULL); default: panic("Invalid zfeature type %d", spa_feature_table[f].fi_type); return (B_FALSE); } } boolean_t dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f) { return (zfeature_active(f, ds->ds_feature[f])); } /* * The buffers passed out by this function are references to internal buffers; * they should not be freed by callers of this function, and they should not be * used after the dataset has been released. */ boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds, spa_feature_t f, uint64_t *outlength, uint64_t **outp) { VERIFY(spa_feature_table[f].fi_type & ZFEATURE_TYPE_UINT64_ARRAY); if (!dsl_dataset_feature_is_active(ds, f)) { return (B_FALSE); } struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f]; *outp = ftuaa->array; *outlength = ftuaa->length; return (B_TRUE); } void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, void *arg, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; uint64_t zero = 0; VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); spa_feature_incr(spa, f, tx); dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); switch (spa_feature_table[f].fi_type) { case ZFEATURE_TYPE_BOOLEAN: ASSERT3S((boolean_t)(uintptr_t)arg, ==, B_TRUE); VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid, sizeof (zero), 1, &zero, tx)); break; case ZFEATURE_TYPE_UINT64_ARRAY: { struct feature_type_uint64_array_arg *ftuaa = arg; VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid, sizeof (uint64_t), ftuaa->length, ftuaa->array, tx)); break; } default: panic("Invalid zfeature type %d", spa_feature_table[f].fi_type); } } static void dsl_dataset_deactivate_feature_impl(dsl_dataset_t *ds, spa_feature_t f, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; uint64_t dsobj = ds->ds_object; VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx)); spa_feature_decr(spa, f, tx); ds->ds_feature[f] = NULL; } void dsl_dataset_deactivate_feature(dsl_dataset_t *ds, spa_feature_t f, dmu_tx_t *tx) { unload_zfeature(ds, f); dsl_dataset_deactivate_feature_impl(ds, f, tx); } uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; uint64_t dsobj; objset_t *mos = dp->dp_meta_objset; if (origin == NULL) origin = dp->dp_origin_snap; ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; memset(dsphys, 0, sizeof (dsl_dataset_phys_t)); dsphys->ds_dir_obj = dd->dd_object; dsphys->ds_flags = flags; dsphys->ds_fsid_guid = unique_create(); (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_snapnames_zapobj = zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; if (origin == NULL) { dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); } else { dsl_dataset_t *ohds; /* head of the origin snapshot */ dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = dsl_dataset_phys(origin)->ds_creation_txg; dsphys->ds_referenced_bytes = dsl_dataset_phys(origin)->ds_referenced_bytes; dsphys->ds_compressed_bytes = dsl_dataset_phys(origin)->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = dsl_dataset_phys(origin)->ds_uncompressed_bytes; rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; rrw_exit(&origin->ds_bp_rwlock, FTAG); /* * Inherit flags that describe the dataset's contents * (INCONSISTENT) or properties (Case Insensitive). */ dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (zfeature_active(f, origin->ds_feature[f])) { dsl_dataset_activate_feature(dsobj, f, origin->ds_feature[f], tx); } } dmu_buf_will_dirty(origin->ds_dbuf, tx); dsl_dataset_phys(origin)->ds_num_children++; VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, FTAG, &ohds)); dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); dsl_dataset_rele(ohds, FTAG); if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { dsl_dataset_phys(origin)->ds_next_clones_obj = zap_create(mos, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(mos, dsl_dataset_phys(origin)->ds_next_clones_obj, dsobj, tx)); } dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); dsl_dir_phys(origin->ds_dir)->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(mos, dsl_dir_phys(origin->ds_dir)->dd_clones, dsobj, tx)); } } /* handle encryption */ dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; dmu_buf_rele(dbuf, FTAG); dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; return (dsobj); } static void dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) { objset_t *os; VERIFY0(dmu_objset_from_ds(ds, &os)); if (memcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) { dsl_pool_t *dp = ds->ds_dir->dd_pool; zio_t *zio; memset(&os->os_zil_header, 0, sizeof (os->os_zil_header)); if (os->os_encrypted) os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); VERIFY0(zio_wait(zio)); /* dsl_dataset_sync_done will drop this reference. */ dmu_buf_add_ref(ds->ds_dbuf, ds); dsl_dataset_sync_done(ds, tx); } } uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = pdd->dd_pool; uint64_t dsobj, ddobj; dsl_dir_t *dd; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(lastname[0] != '@'); /* * Filesystems will eventually have their origin set to dp_origin_snap, * but that's taken care of in dsl_dataset_create_sync_dd. When * creating a filesystem, this function is called with origin equal to * NULL. */ if (origin != NULL) ASSERT3P(origin, !=, dp->dp_origin_snap); ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp, flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); /* * If we are creating a clone and the livelist feature is enabled, * add the entry DD_FIELD_LIVELIST to ZAP. */ if (origin != NULL && spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LIVELIST)) { objset_t *mos = dd->dd_pool->dp_meta_objset; dsl_dir_zapify(dd, tx); uint64_t obj = dsl_deadlist_alloc(mos, tx); VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj, tx)); spa_feature_incr(dp->dp_spa, SPA_FEATURE_LIVELIST, tx); } /* * Since we're creating a new node we know it's a leaf, so we can * initialize the counts if the limit feature is active. */ if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { uint64_t cnt = 0; objset_t *os = dd->dd_pool->dp_meta_objset; dsl_dir_zapify(dd, tx); VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (cnt), 1, &cnt, tx)); VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (cnt), 1, &cnt, tx)); } dsl_dir_rele(dd, FTAG); /* * If we are creating a clone, make sure we zero out any stale * data from the origin snapshots zil header. */ if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_zero_zil(ds, tx); dsl_dataset_rele(ds, FTAG); } return (dsobj); } /* * The unique space in the head dataset can be calculated by subtracting * the space used in the most recent snapshot, that is still being used * in this file system, from the space currently in use. To figure out * the space in the most recent snapshot still in use, we need to take * the total space used in the snapshot and subtract out the space that * has been freed up since the snapshot was taken. */ void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { uint64_t mrs_used; uint64_t dlused, dlcomp, dluncomp; ASSERT(!ds->ds_is_snapshot); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; else mrs_used = 0; dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); ASSERT3U(dlused, <=, mrs_used); dsl_dataset_phys(ds)->ds_unique_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; } void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t count __maybe_unused; int err; ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, obj, tx); /* * The err should not be ENOENT, but a bug in a previous version * of the code could cause upgrade_clones_cb() to not set * ds_next_snap_obj when it should, leading to a missing entry. * If we knew that the pool was created after * SPA_VERSION_NEXT_CLONES, we could assert that it isn't * ENOENT. However, at least we can check that we don't have * too many entries in the next_clones_obj even after failing to * remove this one. */ if (err != ENOENT) VERIFY0(err); ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count)); ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); } blkptr_t * dsl_dataset_get_blkptr(dsl_dataset_t *ds) { return (&dsl_dataset_phys(ds)->ds_bp); } spa_t * dsl_dataset_get_spa(dsl_dataset_t *ds) { return (ds->ds_dir->dd_pool->dp_spa); } void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_pool_t *dp; if (ds == NULL) /* this is the meta-objset */ return; ASSERT(ds->ds_objset != NULL); if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) panic("dirtying snapshot!"); /* Must not dirty a dataset in the same txg where it got snapshotted. */ ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); dp = ds->ds_dir->dd_pool; if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { objset_t *os = ds->ds_objset; /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); /* if this dataset is encrypted, grab a reference to the DCK */ if (ds->ds_dir->dd_crypto_obj != 0 && !os->os_raw_receive && !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { ASSERT3P(ds->ds_key_mapping, !=, NULL); key_mapping_add_ref(ds->ds_key_mapping, ds); } } } static int dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t asize; if (!dmu_tx_is_syncing(tx)) return (0); /* * If there's an fs-only reservation, any blocks that might become * owned by the snapshot dataset must be accommodated by space * outside of the reservation. */ ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) return (SET_ERROR(ENOSPC)); /* * Propagate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) dsl_dir_willuse_space(ds->ds_dir, asize, tx); return (0); } int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc) { int error; uint64_t value; ds->ds_trysnap_txg = tx->tx_txg; if (!dmu_tx_is_syncing(tx)) return (0); /* * We don't allow multiple snapshots of the same txg. If there * is already one, try again. */ if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) return (SET_ERROR(EAGAIN)); /* * Check for conflicting snapshot name. */ error = dsl_dataset_snap_lookup(ds, snapname, &value); if (error == 0) return (SET_ERROR(EEXIST)); if (error != ENOENT) return (error); /* * We don't allow taking snapshots of inconsistent datasets, such as * those into which we are currently receiving. However, if we are * creating this snapshot as part of a receive, this check will be * executed atomically with respect to the completion of the receive * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this * case we ignore this, knowing it will be fixed up for us shortly in * dmu_recv_end_sync(). */ if (!recv && DS_IS_INCONSISTENT(ds)) return (SET_ERROR(EBUSY)); /* * Skip the check for temporary snapshots or if we have already checked * the counts in dsl_dataset_snapshot_check. This means we really only * check the count here when we're receiving a stream. */ if (cnt != 0 && cr != NULL) { error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr, proc); if (error != 0) return (error); } error = dsl_dataset_snapshot_reserve_space(ds, tx); if (error != 0) return (error); return (0); } int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) { dsl_dataset_snapshot_arg_t *ddsa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); nvpair_t *pair; int rv = 0; /* * Pre-compute how many total new snapshots will be created for each * level in the tree and below. This is needed for validating the * snapshot limit when either taking a recursive snapshot or when * taking multiple snapshots. * * The problem is that the counts are not actually adjusted when * we are checking, only when we finally sync. For a single snapshot, * this is easy, the count will increase by 1 at each node up the tree, * but its more complicated for the recursive/multiple snapshot case. * * The dsl_fs_ss_limit_check function does recursively check the count * at each level up the tree but since it is validating each snapshot * independently we need to be sure that we are validating the complete * count for the entire set of snapshots. We do this by rolling up the * counts for each component of the name into an nvlist and then * checking each of those cases with the aggregated count. * * This approach properly handles not only the recursive snapshot * case (where we get all of those on the ddsa_snaps list) but also * the sibling case (e.g. snapshot a/b and a/c so that we will also * validate the limit on 'a' using a count of 2). * * We validate the snapshot names in the third loop and only report * name errors once. */ if (dmu_tx_is_syncing(tx)) { char *nm; nvlist_t *cnt_track = NULL; cnt_track = fnvlist_alloc(); nm = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* Rollup aggregated counts into the cnt_track list */ for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { char *pdelim; uint64_t val; (void) strlcpy(nm, nvpair_name(pair), MAXPATHLEN); pdelim = strchr(nm, '@'); if (pdelim == NULL) continue; *pdelim = '\0'; do { if (nvlist_lookup_uint64(cnt_track, nm, &val) == 0) { /* update existing entry */ fnvlist_add_uint64(cnt_track, nm, val + 1); } else { /* add to list */ fnvlist_add_uint64(cnt_track, nm, 1); } pdelim = strrchr(nm, '/'); if (pdelim != NULL) *pdelim = '\0'; } while (pdelim != NULL); } kmem_free(nm, MAXPATHLEN); /* Check aggregated counts at each level */ for (pair = nvlist_next_nvpair(cnt_track, NULL); pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { int error = 0; char *name; uint64_t cnt = 0; dsl_dataset_t *ds; name = nvpair_name(pair); cnt = fnvpair_value_uint64(pair); ASSERT(cnt > 0); error = dsl_dataset_hold(dp, name, FTAG, &ds); if (error == 0) { error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, ZFS_PROP_SNAPSHOT_LIMIT, NULL, ddsa->ddsa_cr, ddsa->ddsa_proc); dsl_dataset_rele(ds, FTAG); } if (error != 0) { if (ddsa->ddsa_errors != NULL) fnvlist_add_int32(ddsa->ddsa_errors, name, error); rv = error; /* only report one error for this check */ break; } } nvlist_free(cnt_track); } for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { int error = 0; dsl_dataset_t *ds; char *name, *atp = NULL; char dsname[ZFS_MAX_DATASET_NAME_LEN]; name = nvpair_name(pair); if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN) error = SET_ERROR(ENAMETOOLONG); if (error == 0) { atp = strchr(name, '@'); if (atp == NULL) error = SET_ERROR(EINVAL); if (error == 0) (void) strlcpy(dsname, name, atp - name + 1); } if (error == 0) error = dsl_dataset_hold(dp, dsname, FTAG, &ds); if (error == 0) { /* passing 0/NULL skips dsl_fs_ss_limit_check */ error = dsl_dataset_snapshot_check_impl(ds, atp + 1, tx, B_FALSE, 0, NULL, NULL); dsl_dataset_rele(ds, FTAG); } if (error != 0) { if (ddsa->ddsa_errors != NULL) { fnvlist_add_int32(ddsa->ddsa_errors, name, error); } rv = error; } } return (rv); } void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; uint64_t dsobj, crtxg; objset_t *mos = dp->dp_meta_objset; static zil_header_t zero_zil __maybe_unused; objset_t *os __maybe_unused; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); /* * If we are on an old pool, the zil must not be active, in which * case it will be zeroed. Usually zil_suspend() accomplishes this. */ ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || dmu_objset_from_ds(ds, &os) != 0 || memcmp(&os->os_phys->os_zil_header, &zero_zil, sizeof (zero_zil)) == 0); /* Should not snapshot a dirty dataset. */ ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, ds, tx->tx_txg)); dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); /* * The origin's ds_creation_txg has to be < TXG_INITIAL */ if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) crtxg = 1; else crtxg = tx->tx_txg; dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; memset(dsphys, 0, sizeof (dsl_dataset_phys_t)); dsphys->ds_dir_obj = ds->ds_dir->dd_object; dsphys->ds_fsid_guid = unique_create(); (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; dsphys->ds_next_snap_obj = ds->ds_object; dsphys->ds_num_children = 1; dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = crtxg; dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = dsl_dataset_phys(ds)->ds_uncompressed_bytes; dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; rrw_exit(&ds->ds_bp_rwlock, FTAG); dmu_buf_rele(dbuf, FTAG); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (zfeature_active(f, ds->ds_feature[f])) { dsl_dataset_activate_feature(dsobj, f, ds->ds_feature[f], tx); } } ASSERT3U(ds->ds_prev != 0, ==, dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); if (ds->ds_prev) { uint64_t next_clones_obj = dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object || dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; } else if (next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds->ds_prev, dsphys->ds_next_snap_obj, tx); VERIFY0(zap_add_int(mos, next_clones_obj, dsobj, tx)); } } /* * If we have a reference-reservation on this dataset, we will * need to increase the amount of refreservation being charged * since our unique space is going to zero. */ if (ds->ds_reserved) { int64_t delta; ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); } dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_open(&ds->ds_deadlist, mos, dsl_dataset_phys(ds)->ds_deadlist_obj); dsl_deadlist_add_key(&ds->ds_deadlist, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); dsl_bookmark_snapshotted(ds, tx); if (dsl_dataset_remap_deadlist_exists(ds)) { uint64_t remap_deadlist_obj = dsl_dataset_get_remap_deadlist_object(ds); /* * Move the remap_deadlist to the snapshot. The head * will create a new remap deadlist on demand, from * dsl_dataset_block_remapped(). */ dsl_dataset_unset_remap_deadlist_object(ds, tx); dsl_deadlist_close(&ds->ds_remap_deadlist); dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); VERIFY0(zap_add(mos, dsobj, DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx)); } /* * Create a ivset guid for this snapshot if the dataset is * encrypted. This may be overridden by a raw receive. A * previous implementation of this code did not have this * field as part of the on-disk format for ZFS encryption * (see errata #4). As part of the remediation for this * issue, we ask the user to enable the bookmark_v2 feature * which is now a dependency of the encryption feature. We * use this as a heuristic to determine when the user has * elected to correct any datasets created with the old code. * As a result, we only do this step if the bookmark_v2 * feature is enabled, which limits the number of states a * given pool / dataset can be in with regards to terms of * correcting the issue. */ if (ds->ds_dir->dd_crypto_obj != 0 && spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) { uint64_t ivset_guid = unique_create(); dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID, sizeof (ivset_guid), 1, &ivset_guid, tx)); } ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; dsl_dataset_phys(ds)->ds_unique_bytes = 0; if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, snapname, 8, 1, &dsobj, tx)); if (ds->ds_prev) dsl_dataset_rele(ds->ds_prev, ds); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); dsl_scan_ds_snapshotted(ds, tx); dsl_dir_snap_cmtime_update(ds->ds_dir, tx); - spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " "); + if (zfs_snapshot_history_enabled) + spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " "); } void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_snapshot_arg_t *ddsa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); nvpair_t *pair; for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { dsl_dataset_t *ds; char *name, *atp; char dsname[ZFS_MAX_DATASET_NAME_LEN]; name = nvpair_name(pair); atp = strchr(name, '@'); (void) strlcpy(dsname, name, atp - name + 1); VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); if (ddsa->ddsa_props != NULL) { dsl_props_set_sync_impl(ds->ds_prev, ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); } dsl_dataset_rele(ds, FTAG); } } /* * The snapshots must all be in the same pool. * All-or-nothing: if there are any failures, nothing will be modified. */ int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) { dsl_dataset_snapshot_arg_t ddsa; nvpair_t *pair; boolean_t needsuspend; int error; spa_t *spa; char *firstname; nvlist_t *suspended = NULL; pair = nvlist_next_nvpair(snaps, NULL); if (pair == NULL) return (0); firstname = nvpair_name(pair); error = spa_open(firstname, &spa, FTAG); if (error != 0) return (error); needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); spa_close(spa, FTAG); if (needsuspend) { suspended = fnvlist_alloc(); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *snapname = nvpair_name(pair); char *atp; void *cookie; atp = strchr(snapname, '@'); if (atp == NULL) { error = SET_ERROR(EINVAL); break; } (void) strlcpy(fsname, snapname, atp - snapname + 1); error = zil_suspend(fsname, &cookie); if (error != 0) break; fnvlist_add_uint64(suspended, fsname, (uintptr_t)cookie); } } ddsa.ddsa_snaps = snaps; ddsa.ddsa_props = props; ddsa.ddsa_errors = errors; ddsa.ddsa_cr = CRED(); ddsa.ddsa_proc = curproc; if (error == 0) { error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, dsl_dataset_snapshot_sync, &ddsa, fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); } if (suspended != NULL) { for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; pair = nvlist_next_nvpair(suspended, pair)) { zil_resume((void *)(uintptr_t) fnvpair_value_uint64(pair)); } fnvlist_free(suspended); } if (error == 0) { for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { zvol_create_minor(nvpair_name(pair)); } } return (error); } typedef struct dsl_dataset_snapshot_tmp_arg { const char *ddsta_fsname; const char *ddsta_snapname; minor_t ddsta_cleanup_minor; const char *ddsta_htag; } dsl_dataset_snapshot_tmp_arg_t; static int dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) { dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); if (error != 0) return (error); /* NULL cred means no limit check for tmp snapshot */ error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, tx, B_FALSE, 0, NULL, NULL); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOTSUP)); } error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, B_TRUE, tx); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } dsl_dataset_rele(ds, FTAG); return (0); } static void dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds = NULL; VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); dsl_dataset_rele(ds, FTAG); } int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, minor_t cleanup_minor, const char *htag) { dsl_dataset_snapshot_tmp_arg_t ddsta; int error; spa_t *spa; boolean_t needsuspend; void *cookie; ddsta.ddsta_fsname = fsname; ddsta.ddsta_snapname = snapname; ddsta.ddsta_cleanup_minor = cleanup_minor; ddsta.ddsta_htag = htag; error = spa_open(fsname, &spa, FTAG); if (error != 0) return (error); needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); spa_close(spa, FTAG); if (needsuspend) { error = zil_suspend(fsname, &cookie); if (error != 0) return (error); } error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); if (needsuspend) zil_resume(cookie); return (error); } void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(ds->ds_objset != NULL); ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); /* * in case we had to change ds_fsid_guid when we opened it, * sync it out now. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) { VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1, &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1, &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1, &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx)); ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0; ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0; ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0; } dmu_objset_sync(ds->ds_objset, zio, tx); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (zfeature_active(f, ds->ds_feature_activation[f])) { if (zfeature_active(f, ds->ds_feature[f])) continue; dsl_dataset_activate_feature(ds->ds_object, f, ds->ds_feature_activation[f], tx); ds->ds_feature[f] = ds->ds_feature_activation[f]; } } } /* * Check if the percentage of blocks shared between the clone and the * snapshot (as opposed to those that are clone only) is below a certain * threshold */ static boolean_t dsl_livelist_should_disable(dsl_dataset_t *ds) { uint64_t used, referenced; int percent_shared; used = dsl_dir_get_usedds(ds->ds_dir); referenced = dsl_get_referenced(ds); ASSERT3U(referenced, >=, 0); ASSERT3U(used, >=, 0); if (referenced == 0) return (B_FALSE); percent_shared = (100 * (referenced - used)) / referenced; if (percent_shared <= zfs_livelist_min_percent_shared) return (B_TRUE); return (B_FALSE); } /* * Check if it is possible to combine two livelist entries into one. * This is the case if the combined number of 'live' blkptrs (ALLOCs that * don't have a matching FREE) is under the maximum sublist size. * We check this by subtracting twice the total number of frees from the total * number of blkptrs. FREEs are counted twice because each FREE blkptr * will cancel out an ALLOC blkptr when the livelist is processed. */ static boolean_t dsl_livelist_should_condense(dsl_deadlist_entry_t *first, dsl_deadlist_entry_t *next) { uint64_t total_free = first->dle_bpobj.bpo_phys->bpo_num_freed + next->dle_bpobj.bpo_phys->bpo_num_freed; uint64_t total_entries = first->dle_bpobj.bpo_phys->bpo_num_blkptrs + next->dle_bpobj.bpo_phys->bpo_num_blkptrs; if ((total_entries - (2 * total_free)) < zfs_livelist_max_entries) return (B_TRUE); return (B_FALSE); } typedef struct try_condense_arg { spa_t *spa; dsl_dataset_t *ds; } try_condense_arg_t; /* * Iterate over the livelist entries, searching for a pair to condense. * A nonzero return value means stop, 0 means keep looking. */ static int dsl_livelist_try_condense(void *arg, dsl_deadlist_entry_t *first) { try_condense_arg_t *tca = arg; spa_t *spa = tca->spa; dsl_dataset_t *ds = tca->ds; dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist; dsl_deadlist_entry_t *next; /* The condense thread has not yet been created at import */ if (spa->spa_livelist_condense_zthr == NULL) return (1); /* A condense is already in progress */ if (spa->spa_to_condense.ds != NULL) return (1); next = AVL_NEXT(&ll->dl_tree, &first->dle_node); /* The livelist has only one entry - don't condense it */ if (next == NULL) return (1); /* Next is the newest entry - don't condense it */ if (AVL_NEXT(&ll->dl_tree, &next->dle_node) == NULL) return (1); /* This pair is not ready to condense but keep looking */ if (!dsl_livelist_should_condense(first, next)) return (0); /* * Add a ref to prevent the dataset from being evicted while * the condense zthr or synctask are running. Ref will be * released at the end of the condense synctask */ dmu_buf_add_ref(ds->ds_dbuf, spa); spa->spa_to_condense.ds = ds; spa->spa_to_condense.first = first; spa->spa_to_condense.next = next; spa->spa_to_condense.syncing = B_FALSE; spa->spa_to_condense.cancelled = B_FALSE; zthr_wakeup(spa->spa_livelist_condense_zthr); return (1); } static void dsl_flush_pending_livelist(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_dir_t *dd = ds->ds_dir; spa_t *spa = ds->ds_dir->dd_pool->dp_spa; dsl_deadlist_entry_t *last = dsl_deadlist_last(&dd->dd_livelist); /* Check if we need to add a new sub-livelist */ if (last == NULL) { /* The livelist is empty */ dsl_deadlist_add_key(&dd->dd_livelist, tx->tx_txg - 1, tx); } else if (spa_sync_pass(spa) == 1) { /* * Check if the newest entry is full. If it is, make a new one. * We only do this once per sync because we could overfill a * sublist in one sync pass and don't want to add another entry * for a txg that is already represented. This ensures that * blkptrs born in the same txg are stored in the same sublist. */ bpobj_t bpobj = last->dle_bpobj; uint64_t all = bpobj.bpo_phys->bpo_num_blkptrs; uint64_t free = bpobj.bpo_phys->bpo_num_freed; uint64_t alloc = all - free; if (alloc > zfs_livelist_max_entries) { dsl_deadlist_add_key(&dd->dd_livelist, tx->tx_txg - 1, tx); } } /* Insert each entry into the on-disk livelist */ bplist_iterate(&dd->dd_pending_allocs, dsl_deadlist_insert_alloc_cb, &dd->dd_livelist, tx); bplist_iterate(&dd->dd_pending_frees, dsl_deadlist_insert_free_cb, &dd->dd_livelist, tx); /* Attempt to condense every pair of adjacent entries */ try_condense_arg_t arg = { .spa = spa, .ds = ds }; dsl_deadlist_iterate(&dd->dd_livelist, dsl_livelist_try_condense, &arg); } void dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) { objset_t *os = ds->ds_objset; bplist_iterate(&ds->ds_pending_deadlist, dsl_deadlist_insert_alloc_cb, &ds->ds_deadlist, tx); if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) { dsl_flush_pending_livelist(ds, tx); if (dsl_livelist_should_disable(ds)) { dsl_dir_remove_livelist(ds->ds_dir, tx, B_TRUE); } } dsl_bookmark_sync_done(ds, tx); multilist_destroy(&os->os_synced_dnodes); if (os->os_encrypted) os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; else ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]); ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); dmu_buf_rele(ds->ds_dbuf, ds); } int get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val) { uint64_t count = 0; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; zap_cursor_t zc; zap_attribute_t za; ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); /* * There may be missing entries in ds_next_clones_obj * due to a bug in a previous version of the code. * Only trust it if it has the right number of entries. */ if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count)); } if (count != dsl_dataset_phys(ds)->ds_num_children - 1) { return (SET_ERROR(ENOENT)); } for (zap_cursor_init(&zc, mos, dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; char buf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, za.za_first_integer, FTAG, &clone)); dsl_dir_name(clone->ds_dir, buf); fnvlist_add_boolean(val, buf); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); return (0); } void get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) { nvlist_t *propval = fnvlist_alloc(); nvlist_t *val = fnvlist_alloc(); if (get_clones_stat_impl(ds, val) == 0) { fnvlist_add_nvlist(propval, ZPROP_VALUE, val); fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); } nvlist_free(val); nvlist_free(propval); } static char * get_receive_resume_token_impl(dsl_dataset_t *ds) { if (!dsl_dataset_has_resume_receive_state(ds)) return (NULL); dsl_pool_t *dp = ds->ds_dir->dd_pool; char *str; void *packed; uint8_t *compressed; uint64_t val; nvlist_t *token_nv = fnvlist_alloc(); size_t packed_size, compressed_size; if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) { fnvlist_add_uint64(token_nv, "fromguid", val); } if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) { fnvlist_add_uint64(token_nv, "object", val); } if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) { fnvlist_add_uint64(token_nv, "offset", val); } if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) { fnvlist_add_uint64(token_nv, "bytes", val); } if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) { fnvlist_add_uint64(token_nv, "toguid", val); } char buf[MAXNAMELEN]; if (zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) { fnvlist_add_string(token_nv, "toname", buf); } if (zap_contains(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_LARGEBLOCK) == 0) { fnvlist_add_boolean(token_nv, "largeblockok"); } if (zap_contains(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_EMBEDOK) == 0) { fnvlist_add_boolean(token_nv, "embedok"); } if (zap_contains(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_COMPRESSOK) == 0) { fnvlist_add_boolean(token_nv, "compressok"); } if (zap_contains(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_RAWOK) == 0) { fnvlist_add_boolean(token_nv, "rawok"); } if (dsl_dataset_feature_is_active(ds, SPA_FEATURE_REDACTED_DATASETS)) { uint64_t num_redact_snaps = 0; uint64_t *redact_snaps = NULL; VERIFY3B(dsl_dataset_get_uint64_array_feature(ds, SPA_FEATURE_REDACTED_DATASETS, &num_redact_snaps, &redact_snaps), ==, B_TRUE); fnvlist_add_uint64_array(token_nv, "redact_snaps", redact_snaps, num_redact_snaps); } if (zap_contains(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS) == 0) { uint64_t num_redact_snaps = 0, int_size = 0; uint64_t *redact_snaps = NULL; VERIFY0(zap_length(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, &int_size, &num_redact_snaps)); ASSERT3U(int_size, ==, sizeof (uint64_t)); redact_snaps = kmem_alloc(int_size * num_redact_snaps, KM_SLEEP); VERIFY0(zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, int_size, num_redact_snaps, redact_snaps)); fnvlist_add_uint64_array(token_nv, "book_redact_snaps", redact_snaps, num_redact_snaps); kmem_free(redact_snaps, int_size * num_redact_snaps); } packed = fnvlist_pack(token_nv, &packed_size); fnvlist_free(token_nv); compressed = kmem_alloc(packed_size, KM_SLEEP); compressed_size = gzip_compress(packed, compressed, packed_size, packed_size, 6); zio_cksum_t cksum; fletcher_4_native_varsize(compressed, compressed_size, &cksum); size_t alloc_size = compressed_size * 2 + 1; str = kmem_alloc(alloc_size, KM_SLEEP); for (int i = 0; i < compressed_size; i++) { size_t offset = i * 2; (void) snprintf(str + offset, alloc_size - offset, "%02x", compressed[i]); } str[compressed_size * 2] = '\0'; char *propval = kmem_asprintf("%u-%llx-%llx-%s", ZFS_SEND_RESUME_TOKEN_VERSION, (longlong_t)cksum.zc_word[0], (longlong_t)packed_size, str); kmem_free(packed, packed_size); kmem_free(str, alloc_size); kmem_free(compressed, packed_size); return (propval); } /* * Returns a string that represents the receive resume state token. It should * be freed with strfree(). NULL is returned if no resume state is present. */ char * get_receive_resume_token(dsl_dataset_t *ds) { /* * A failed "newfs" (e.g. full) resumable receive leaves * the stats set on this dataset. Check here for the prop. */ char *token = get_receive_resume_token_impl(ds); if (token != NULL) return (token); /* * A failed incremental resumable receive leaves the * stats set on our child named "%recv". Check the child * for the prop. */ /* 6 extra bytes for /%recv */ char name[ZFS_MAX_DATASET_NAME_LEN + 6]; dsl_dataset_t *recv_ds; dsl_dataset_name(ds, name); if (strlcat(name, "/", sizeof (name)) < sizeof (name) && strlcat(name, recv_clone_name, sizeof (name)) < sizeof (name) && dsl_dataset_hold(ds->ds_dir->dd_pool, name, FTAG, &recv_ds) == 0) { token = get_receive_resume_token_impl(recv_ds); dsl_dataset_rele(recv_ds, FTAG); } return (token); } uint64_t dsl_get_refratio(dsl_dataset_t *ds) { uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / dsl_dataset_phys(ds)->ds_compressed_bytes); return (ratio); } uint64_t dsl_get_logicalreferenced(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_uncompressed_bytes); } uint64_t dsl_get_compressratio(dsl_dataset_t *ds) { if (ds->ds_is_snapshot) { return (dsl_get_refratio(ds)); } else { dsl_dir_t *dd = ds->ds_dir; mutex_enter(&dd->dd_lock); uint64_t val = dsl_dir_get_compressratio(dd); mutex_exit(&dd->dd_lock); return (val); } } uint64_t dsl_get_used(dsl_dataset_t *ds) { if (ds->ds_is_snapshot) { return (dsl_dataset_phys(ds)->ds_unique_bytes); } else { dsl_dir_t *dd = ds->ds_dir; mutex_enter(&dd->dd_lock); uint64_t val = dsl_dir_get_used(dd); mutex_exit(&dd->dd_lock); return (val); } } uint64_t dsl_get_creation(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_creation_time); } uint64_t dsl_get_creationtxg(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_creation_txg); } uint64_t dsl_get_refquota(dsl_dataset_t *ds) { return (ds->ds_quota); } uint64_t dsl_get_refreservation(dsl_dataset_t *ds) { return (ds->ds_reserved); } uint64_t dsl_get_guid(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_guid); } uint64_t dsl_get_unique(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_unique_bytes); } uint64_t dsl_get_objsetid(dsl_dataset_t *ds) { return (ds->ds_object); } uint64_t dsl_get_userrefs(dsl_dataset_t *ds) { return (ds->ds_userrefs); } uint64_t dsl_get_defer_destroy(dsl_dataset_t *ds) { return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0); } uint64_t dsl_get_referenced(dsl_dataset_t *ds) { return (dsl_dataset_phys(ds)->ds_referenced_bytes); } uint64_t dsl_get_numclones(dsl_dataset_t *ds) { ASSERT(ds->ds_is_snapshot); return (dsl_dataset_phys(ds)->ds_num_children - 1); } uint64_t dsl_get_inconsistent(dsl_dataset_t *ds) { return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ? 1 : 0); } uint64_t dsl_get_redacted(dsl_dataset_t *ds) { return (dsl_dataset_feature_is_active(ds, SPA_FEATURE_REDACTED_DATASETS)); } uint64_t dsl_get_available(dsl_dataset_t *ds) { uint64_t refdbytes = dsl_get_referenced(ds); uint64_t availbytes = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { availbytes += ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; } if (ds->ds_quota != 0) { /* * Adjust available bytes according to refquota */ if (refdbytes < ds->ds_quota) { availbytes = MIN(availbytes, ds->ds_quota - refdbytes); } else { availbytes = 0; } } return (availbytes); } int dsl_get_written(dsl_dataset_t *ds, uint64_t *written) { dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_dataset_t *prev; int err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err == 0) { uint64_t comp, uncomp; err = dsl_dataset_space_written(prev, ds, written, &comp, &uncomp); dsl_dataset_rele(prev, FTAG); } return (err); } /* * 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN. */ int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap) { dsl_pool_t *dp = ds->ds_dir->dd_pool; if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { dsl_dataset_name(ds->ds_prev, snap); return (0); } else { return (SET_ERROR(ENOENT)); } } void dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval) { uint64_t nsnaps; uint64_t *snaps; if (dsl_dataset_get_uint64_array_feature(ds, SPA_FEATURE_REDACTED_DATASETS, &nsnaps, &snaps)) { fnvlist_add_uint64_array(propval, ZPROP_VALUE, snaps, nsnaps); } } /* * Returns the mountpoint property and source for the given dataset in the value * and source buffers. The value buffer must be at least as large as MAXPATHLEN * and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN. * Returns 0 on success and an error on failure. */ int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, char *source) { int error; dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Retrieve the mountpoint value stored in the zap object */ error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1, ZAP_MAXVALUELEN, value, source); if (error != 0) { return (error); } /* * Process the dsname and source to find the full mountpoint string. * Can be skipped for 'legacy' or 'none'. */ if (value[0] == '/') { char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); char *root = buf; const char *relpath; /* * If we inherit the mountpoint, even from a dataset * with a received value, the source will be the path of * the dataset we inherit from. If source is * ZPROP_SOURCE_VAL_RECVD, the received value is not * inherited. */ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { relpath = ""; } else { ASSERT0(strncmp(dsname, source, strlen(source))); relpath = dsname + strlen(source); if (relpath[0] == '/') relpath++; } spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN); /* * Special case an alternate root of '/'. This will * avoid having multiple leading slashes in the * mountpoint path. */ if (strcmp(root, "/") == 0) root++; /* * If the mountpoint is '/' then skip over this * if we are obtaining either an alternate root or * an inherited mountpoint. */ char *mnt = value; if (value[1] == '\0' && (root[0] != '\0' || relpath[0] != '\0')) mnt = value + 1; if (relpath[0] == '\0') { (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s", root, mnt); } else { (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s", root, mnt, relpath[0] == '@' ? "" : "/", relpath); } kmem_free(buf, ZAP_MAXVALUELEN); } return (0); } void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, dsl_get_refratio(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, dsl_get_logicalreferenced(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, dsl_get_compressratio(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dsl_get_used(ds)); if (ds->ds_is_snapshot) { get_clones_stat(ds, nv); } else { char buf[ZFS_MAX_DATASET_NAME_LEN]; if (dsl_get_prev_snap(ds, buf) == 0) dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); dsl_dir_stats(ds->ds_dir, nv); } nvlist_t *propval = fnvlist_alloc(); dsl_get_redact_snaps(ds, propval); fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), propval); nvlist_free(propval); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, dsl_get_available(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, dsl_get_referenced(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, dsl_get_creation(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, dsl_get_creationtxg(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, dsl_get_refquota(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, dsl_get_refreservation(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, dsl_get_guid(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, dsl_get_unique(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, dsl_get_objsetid(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, dsl_get_userrefs(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, dsl_get_defer_destroy(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOTS_CHANGED, dsl_dir_snap_cmtime(ds->ds_dir).tv_sec); dsl_dataset_crypt_stats(ds, nv); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { uint64_t written; if (dsl_get_written(ds, &written) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, written); } } if (!dsl_dataset_is_snapshot(ds)) { char *token = get_receive_resume_token(ds); if (token != NULL) { dsl_prop_nvlist_add_string(nv, ZFS_PROP_RECEIVE_RESUME_TOKEN, token); kmem_strfree(token); } } } void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); stat->dds_creation_txg = dsl_get_creationtxg(ds); stat->dds_inconsistent = dsl_get_inconsistent(ds); stat->dds_guid = dsl_get_guid(ds); stat->dds_redacted = dsl_get_redacted(ds); stat->dds_origin[0] = '\0'; if (ds->ds_is_snapshot) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = dsl_get_numclones(ds); } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; if (dsl_dir_is_clone(ds->ds_dir)) { dsl_dir_get_origin(ds->ds_dir, stat->dds_origin); } } } uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds) { return (ds->ds_fsid_guid); } void dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp) { *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) *availbytesp += ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; if (ds->ds_quota != 0) { /* * Adjust available bytes according to refquota */ if (*refdbytesp < ds->ds_quota) *availbytesp = MIN(*availbytesp, ds->ds_quota - *refdbytesp); else *availbytesp = 0; } rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); rrw_exit(&ds->ds_bp_rwlock, FTAG); *availobjsp = DN_MAX_OBJECT - *usedobjsp; } boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) { dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool; uint64_t birth; ASSERT(dsl_pool_config_held(dp)); if (snap == NULL) return (B_FALSE); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); birth = dsl_dataset_get_blkptr(ds)->blk_birth; rrw_exit(&ds->ds_bp_rwlock, FTAG); if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { objset_t *os, *os_snap; /* * It may be that only the ZIL differs, because it was * reset in the head. Don't count that as being * modified. */ if (dmu_objset_from_ds(ds, &os) != 0) return (B_TRUE); if (dmu_objset_from_ds(snap, &os_snap) != 0) return (B_TRUE); return (memcmp(&os->os_phys->os_meta_dnode, &os_snap->os_phys->os_meta_dnode, sizeof (os->os_phys->os_meta_dnode)) != 0); } return (B_FALSE); } typedef struct dsl_dataset_rename_snapshot_arg { const char *ddrsa_fsname; const char *ddrsa_oldsnapname; const char *ddrsa_newsnapname; boolean_t ddrsa_recursive; dmu_tx_t *ddrsa_tx; } dsl_dataset_rename_snapshot_arg_t; static int dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { (void) dp; dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; int error; uint64_t val; error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); if (error != 0) { /* ignore nonexistent snapshots */ return (error == ENOENT ? 0 : error); } /* new name should not exist */ error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); if (error == 0) error = SET_ERROR(EEXIST); else if (error == ENOENT) error = 0; /* dataset name + 1 for the "@" + the new snapshot name must fit */ if (dsl_dir_namelen(hds->ds_dir) + 1 + strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN) error = SET_ERROR(ENAMETOOLONG); return (error); } static int dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) { dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds; int error; error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); if (error != 0) return (error); if (ddrsa->ddrsa_recursive) { error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, dsl_dataset_rename_snapshot_check_impl, ddrsa, DS_FIND_CHILDREN); } else { error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); } dsl_dataset_rele(hds, FTAG); return (error); } static int dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; dsl_dataset_t *ds; uint64_t val; dmu_tx_t *tx = ddrsa->ddrsa_tx; int error; error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); ASSERT(error == 0 || error == ENOENT); if (error == ENOENT) { /* ignore nonexistent snapshots */ return (0); } VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); /* log before we change the name */ spa_history_log_internal_ds(ds, "rename", tx, "-> @%s", ddrsa->ddrsa_newsnapname); VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, B_FALSE)); mutex_enter(&ds->ds_lock); (void) strlcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname, sizeof (ds->ds_snapname)); mutex_exit(&ds->ds_lock); VERIFY0(zap_add(dp->dp_meta_objset, dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname, ddrsa->ddrsa_newsnapname, B_TRUE); dsl_dataset_rele(ds, FTAG); return (0); } static void dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds = NULL; VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); ddrsa->ddrsa_tx = tx; if (ddrsa->ddrsa_recursive) { VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, dsl_dataset_rename_snapshot_sync_impl, ddrsa, DS_FIND_CHILDREN)); } else { VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); } dsl_dataset_rele(hds, FTAG); } int dsl_dataset_rename_snapshot(const char *fsname, const char *oldsnapname, const char *newsnapname, boolean_t recursive) { dsl_dataset_rename_snapshot_arg_t ddrsa; ddrsa.ddrsa_fsname = fsname; ddrsa.ddrsa_oldsnapname = oldsnapname; ddrsa.ddrsa_newsnapname = newsnapname; ddrsa.ddrsa_recursive = recursive; return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, dsl_dataset_rename_snapshot_sync, &ddrsa, 1, ZFS_SPACE_CHECK_RESERVED)); } /* * If we're doing an ownership handoff, we need to make sure that there is * only one long hold on the dataset. We're not allowed to change anything here * so we don't permanently release the long hold or regular hold here. We want * to do this only when syncing to avoid the dataset unexpectedly going away * when we release the long hold. */ static int dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) { boolean_t held = B_FALSE; if (!dmu_tx_is_syncing(tx)) return (0); dsl_dir_t *dd = ds->ds_dir; mutex_enter(&dd->dd_activity_lock); uint64_t holds = zfs_refcount_count(&ds->ds_longholds) - (owner != NULL ? 1 : 0); /* * The value of dd_activity_waiters can chance as soon as we drop the * lock, but we're fine with that; new waiters coming in or old * waiters leaving doesn't cause problems, since we're going to cancel * waiters later anyway. The goal of this check is to verify that no * non-waiters have long-holds, and all new long-holds will be * prevented because we're holding the pool config as writer. */ if (holds != dd->dd_activity_waiters) held = B_TRUE; mutex_exit(&dd->dd_activity_lock); if (held) return (SET_ERROR(EBUSY)); return (0); } int dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) { dsl_dataset_rollback_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int64_t unused_refres_delta; int error; error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); if (error != 0) return (error); /* must not be a snapshot */ if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } /* must have a most recent snapshot */ if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ESRCH)); } /* * No rollback to a snapshot created in the current txg, because * the rollback may dirty the dataset and create blocks that are * not reachable from the rootbp while having a birth txg that * falls into the snapshot's range. */ if (dmu_tx_is_syncing(tx) && dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EAGAIN)); } /* * If the expected target snapshot is specified, then check that * the latest snapshot is it. */ if (ddra->ddra_tosnap != NULL) { dsl_dataset_t *snapds; /* Check if the target snapshot exists at all. */ error = dsl_dataset_hold(dp, ddra->ddra_tosnap, FTAG, &snapds); if (error != 0) { /* * ESRCH is used to signal that the target snapshot does * not exist, while ENOENT is used to report that * the rolled back dataset does not exist. * ESRCH is also used to cover other cases where the * target snapshot is not related to the dataset being * rolled back such as being in a different pool. */ if (error == ENOENT || error == EXDEV) error = SET_ERROR(ESRCH); dsl_dataset_rele(ds, FTAG); return (error); } ASSERT(snapds->ds_is_snapshot); /* Check if the snapshot is the latest snapshot indeed. */ if (snapds != ds->ds_prev) { /* * Distinguish between the case where the only problem * is intervening snapshots (EEXIST) vs the snapshot * not being a valid target for rollback (ESRCH). */ if (snapds->ds_dir == ds->ds_dir || (dsl_dir_is_clone(ds->ds_dir) && dsl_dir_phys(ds->ds_dir)->dd_origin_obj == snapds->ds_object)) { error = SET_ERROR(EEXIST); } else { error = SET_ERROR(ESRCH); } dsl_dataset_rele(snapds, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } dsl_dataset_rele(snapds, FTAG); } /* must not have any bookmarks after the most recent snapshot */ if (dsl_bookmark_latest_txg(ds) > dsl_dataset_phys(ds)->ds_prev_snap_txg) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EEXIST)); } error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } /* * Check if the snap we are rolling back to uses more than * the refquota. */ if (ds->ds_quota != 0 && dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EDQUOT)); } /* * When we do the clone swap, we will temporarily use more space * due to the refreservation (the head will no longer have any * unique space, so the entire amount of the refreservation will need * to be free). We will immediately destroy the clone, freeing * this space, but the freeing happens over many txg's. */ unused_refres_delta = (int64_t)MIN(ds->ds_reserved, dsl_dataset_phys(ds)->ds_unique_bytes); if (unused_refres_delta > 0 && unused_refres_delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOSPC)); } dsl_dataset_rele(ds, FTAG); return (0); } void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_rollback_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds, *clone; uint64_t cloneobj; char namebuf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); dsl_dataset_name(ds->ds_prev, namebuf); fnvlist_add_string(ddra->ddra_result, "target", namebuf); cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); dsl_dataset_clone_swap_sync_impl(clone, ds, tx); dsl_dataset_zero_zil(ds, tx); dsl_destroy_head_sync_impl(clone, tx); dsl_dataset_rele(clone, FTAG); dsl_dataset_rele(ds, FTAG); } /* * Rolls back the given filesystem or volume to the most recent snapshot. * The name of the most recent snapshot will be returned under key "target" * in the result nvlist. * * If owner != NULL: * - The existing dataset MUST be owned by the specified owner at entry * - Upon return, dataset will still be held by the same owner, whether we * succeed or not. * * This mode is required any time the existing filesystem is mounted. See * notes above zfs_suspend_fs() for further details. */ int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner, nvlist_t *result) { dsl_dataset_rollback_arg_t ddra; ddra.ddra_fsname = fsname; ddra.ddra_tosnap = tosnap; ddra.ddra_owner = owner; ddra.ddra_result = result; return (dsl_sync_task(fsname, dsl_dataset_rollback_check, dsl_dataset_rollback_sync, &ddra, 1, ZFS_SPACE_CHECK_RESERVED)); } struct promotenode { list_node_t link; dsl_dataset_t *ds; }; static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, const void *tag); static void promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag); int dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) { dsl_dataset_promote_arg_t *ddpa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds; struct promotenode *snap; dsl_dataset_t *origin_ds, *origin_head; int err; uint64_t unused; uint64_t ss_mv_cnt; size_t max_snap_len; boolean_t conflicting_snaps; err = promote_hold(ddpa, dp, FTAG); if (err != 0) return (err); hds = ddpa->ddpa_clone; max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { promote_rele(ddpa, FTAG); return (SET_ERROR(EXDEV)); } snap = list_head(&ddpa->shared_snaps); origin_head = snap->ds; if (snap == NULL) { err = SET_ERROR(ENOENT); goto out; } origin_ds = snap->ds; /* * Encrypted clones share a DSL Crypto Key with their origin's dsl dir. * When doing a promote we must make sure the encryption root for * both the target and the target's origin does not change to avoid * needing to rewrap encryption keys */ err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir); if (err != 0) goto out; /* * Compute and check the amount of space to transfer. Since this is * so expensive, don't do the preliminary check. */ if (!dmu_tx_is_syncing(tx)) { promote_rele(ddpa, FTAG); return (0); } /* compute origin's new unique space */ snap = list_tail(&ddpa->clone_snaps); ASSERT(snap != NULL); ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, origin_ds->ds_object); dsl_deadlist_space_range(&snap->ds->ds_deadlist, dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, &ddpa->unique, &unused, &unused); /* * Walk the snapshots that we are moving * * Compute space to transfer. Consider the incremental changes * to used by each snapshot: * (my used) = (prev's used) + (blocks born) - (blocks killed) * So each snapshot gave birth to: * (blocks born) = (my used) - (prev's used) + (blocks killed) * So a sequence would look like: * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) * Which simplifies to: * uN + kN + kN-1 + ... + k1 + k0 * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ conflicting_snaps = B_FALSE; ss_mv_cnt = 0; ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; for (snap = list_head(&ddpa->shared_snaps); snap; snap = list_next(&ddpa->shared_snaps, snap)) { uint64_t val, dlused, dlcomp, dluncomp; dsl_dataset_t *ds = snap->ds; ss_mv_cnt++; /* * If there are long holds, we won't be able to evict * the objset. */ if (dsl_dataset_long_held(ds)) { err = SET_ERROR(EBUSY); goto out; } /* Check that the snapshot name does not conflict */ VERIFY0(dsl_dataset_get_snapname(ds)); if (strlen(ds->ds_snapname) >= max_snap_len) { err = SET_ERROR(ENAMETOOLONG); goto out; } err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); if (err == 0) { fnvlist_add_boolean(ddpa->err_ds, snap->ds->ds_snapname); conflicting_snaps = B_TRUE; } else if (err != ENOENT) { goto out; } /* The very first snapshot does not have a deadlist */ if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) continue; dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); ddpa->used += dlused; ddpa->comp += dlcomp; ddpa->uncomp += dluncomp; } /* * Check that bookmarks that are being transferred don't have * name conflicts. */ for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks); dbn != NULL && dbn->dbn_phys.zbm_creation_txg <= dsl_dataset_phys(origin_ds)->ds_creation_txg; dbn = AVL_NEXT(&origin_head->ds_bookmarks, dbn)) { if (strlen(dbn->dbn_name) >= max_snap_len) { err = SET_ERROR(ENAMETOOLONG); goto out; } zfs_bookmark_phys_t bm; err = dsl_bookmark_lookup_impl(ddpa->ddpa_clone, dbn->dbn_name, &bm); if (err == 0) { fnvlist_add_boolean(ddpa->err_ds, dbn->dbn_name); conflicting_snaps = B_TRUE; } else if (err == ESRCH) { err = 0; } else if (err != 0) { goto out; } } /* * In order to return the full list of conflicting snapshots, we check * whether there was a conflict after traversing all of them. */ if (conflicting_snaps) { err = SET_ERROR(EEXIST); goto out; } /* * If we are a clone of a clone then we never reached ORIGIN, * so we need to subtract out the clone origin's used space. */ if (ddpa->origin_origin) { ddpa->used -= dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; ddpa->comp -= dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; ddpa->uncomp -= dsl_dataset_phys(ddpa->origin_origin)-> ds_uncompressed_bytes; } /* Check that there is enough space and limit headroom here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 0, ss_mv_cnt, ddpa->used, ddpa->cr, ddpa->proc); if (err != 0) goto out; /* * Compute the amounts of space that will be used by snapshots * after the promotion (for both origin and clone). For each, * it is the amount of space that will be on all of their * deadlists (that was not born before their new origin). */ if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { uint64_t space; /* * Note, typically this will not be a clone of a clone, * so dd_origin_txg will be < TXG_INITIAL, so * these snaplist_space() -> dsl_deadlist_space_range() * calls will be fast because they do not have to * iterate over all bps. */ snap = list_head(&ddpa->origin_snaps); if (snap == NULL) { err = SET_ERROR(ENOENT); goto out; } err = snaplist_space(&ddpa->shared_snaps, snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); if (err != 0) goto out; err = snaplist_space(&ddpa->clone_snaps, snap->ds->ds_dir->dd_origin_txg, &space); if (err != 0) goto out; ddpa->cloneusedsnap += space; } if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { err = snaplist_space(&ddpa->origin_snaps, dsl_dataset_phys(origin_ds)->ds_creation_txg, &ddpa->originusedsnap); if (err != 0) goto out; } out: promote_rele(ddpa, FTAG); return (err); } void dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_promote_arg_t *ddpa = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds; struct promotenode *snap; dsl_dataset_t *origin_ds; dsl_dataset_t *origin_head; dsl_dir_t *dd; dsl_dir_t *odd = NULL; uint64_t oldnext_obj; int64_t delta; ASSERT(nvlist_empty(ddpa->err_ds)); VERIFY0(promote_hold(ddpa, dp, FTAG)); hds = ddpa->ddpa_clone; ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); snap = list_head(&ddpa->shared_snaps); origin_ds = snap->ds; dd = hds->ds_dir; snap = list_head(&ddpa->origin_snaps); origin_head = snap->ds; /* * We need to explicitly open odd, since origin_ds's dd will be * changing. */ VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx); /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; snap = list_tail(&ddpa->clone_snaps); ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, origin_ds->ds_object); dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; /* change the origin's next clone */ if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { dsl_dataset_remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dataset_phys(origin_ds)->ds_next_clones_obj, oldnext_obj, tx)); } /* change origin */ dmu_buf_will_dirty(dd->dd_dbuf, tx); ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; dmu_buf_will_dirty(odd->dd_dbuf, tx); dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; origin_head->ds_dir->dd_origin_txg = dsl_dataset_phys(origin_ds)->ds_creation_txg; /* change dd_clone entries */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY0(zap_remove_int(dp->dp_meta_objset, dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, hds->ds_object, tx)); VERIFY0(zap_remove_int(dp->dp_meta_objset, dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, origin_head->ds_object, tx)); if (dsl_dir_phys(dd)->dd_clones == 0) { dsl_dir_phys(dd)->dd_clones = zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); } /* * Move bookmarks to this dir. */ dsl_bookmark_node_t *dbn_next; for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks); dbn != NULL && dbn->dbn_phys.zbm_creation_txg <= dsl_dataset_phys(origin_ds)->ds_creation_txg; dbn = dbn_next) { dbn_next = AVL_NEXT(&origin_head->ds_bookmarks, dbn); avl_remove(&origin_head->ds_bookmarks, dbn); VERIFY0(zap_remove(dp->dp_meta_objset, origin_head->ds_bookmarks_obj, dbn->dbn_name, tx)); dsl_bookmark_node_add(hds, dbn, tx); } dsl_bookmark_next_changed(hds, origin_ds, tx); /* move snapshots to this dir */ for (snap = list_head(&ddpa->shared_snaps); snap; snap = list_next(&ddpa->shared_snaps, snap)) { dsl_dataset_t *ds = snap->ds; /* * Property callbacks are registered to a particular * dsl_dir. Since ours is changing, evict the objset * so that they will be unregistered from the old dsl_dir. */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } /* move snap name entry */ VERIFY0(dsl_dataset_get_snapname(ds)); VERIFY0(dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx, B_TRUE)); VERIFY0(zap_add(dp->dp_meta_objset, dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); dsl_fs_ss_count_adjust(hds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, odd); dsl_dir_rele(ds->ds_dir, ds); VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); /* move any clone references */ if (dsl_dataset_phys(ds)->ds_next_clones_obj && spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *cnds; uint64_t o; if (za.za_first_integer == oldnext_obj) { /* * We've already moved the * origin's reference. */ continue; } VERIFY0(dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &cnds)); o = dsl_dir_phys(cnds->ds_dir)-> dd_head_dataset_obj; VERIFY0(zap_remove_int(dp->dp_meta_objset, dsl_dir_phys(odd)->dd_clones, o, tx)); VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dir_phys(dd)->dd_clones, o, tx)); dsl_dataset_rele(cnds, FTAG); } zap_cursor_fini(&zc); } ASSERT(!dsl_prop_hascb(ds)); } /* * Change space accounting. * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either * both be valid, or both be 0 (resulting in delta == 0). This * is true for each of {clone,origin} independently. */ delta = ddpa->cloneusedsnap - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, >=, 0); ASSERT3U(ddpa->used, >=, delta); dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); dsl_dir_diduse_space(dd, DD_USED_HEAD, ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); delta = ddpa->originusedsnap - dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, <=, 0); ASSERT3U(ddpa->used, >=, -delta); dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); dsl_dir_diduse_space(odd, DD_USED_HEAD, -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; /* * Since livelists are specific to a clone's origin txg, they * are no longer accurate. Destroy the livelist from the clone being * promoted. If the origin dataset is a clone, destroy its livelist * as well. */ dsl_dir_remove_livelist(dd, tx, B_TRUE); dsl_dir_remove_livelist(odd, tx, B_TRUE); /* log history record */ spa_history_log_internal_ds(hds, "promote", tx, " "); dsl_dir_rele(odd, FTAG); promote_rele(ddpa, FTAG); /* * Transfer common error blocks from old head to new head. */ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) { uint64_t old_head = origin_head->ds_object; uint64_t new_head = hds->ds_object; spa_swap_errlog(dp->dp_spa, new_head, old_head, tx); } } /* * Make a list of dsl_dataset_t's for the snapshots between first_obj * (exclusive) and last_obj (inclusive). The list will be in reverse * order (last_obj will be the list_head()). If first_obj == 0, do all * snapshots back to this dataset's origin. */ static int snaplist_make(dsl_pool_t *dp, uint64_t first_obj, uint64_t last_obj, list_t *l, const void *tag) { uint64_t obj = last_obj; list_create(l, sizeof (struct promotenode), offsetof(struct promotenode, link)); while (obj != first_obj) { dsl_dataset_t *ds; struct promotenode *snap; int err; err = dsl_dataset_hold_obj(dp, obj, tag, &ds); ASSERT(err != ENOENT); if (err != 0) return (err); if (first_obj == 0) first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; snap = kmem_alloc(sizeof (*snap), KM_SLEEP); snap->ds = ds; list_insert_tail(l, snap); obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } return (0); } static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) { struct promotenode *snap; *spacep = 0; for (snap = list_head(l); snap; snap = list_next(l, snap)) { uint64_t used, comp, uncomp; dsl_deadlist_space_range(&snap->ds->ds_deadlist, mintxg, UINT64_MAX, &used, &comp, &uncomp); *spacep += used; } return (0); } static void snaplist_destroy(list_t *l, const void *tag) { struct promotenode *snap; if (l == NULL || !list_link_active(&l->list_head)) return; while ((snap = list_tail(l)) != NULL) { list_remove(l, snap); dsl_dataset_rele(snap->ds, tag); kmem_free(snap, sizeof (*snap)); } list_destroy(l); } static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, const void *tag) { int error; dsl_dir_t *dd; struct promotenode *snap; error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, &ddpa->ddpa_clone); if (error != 0) return (error); dd = ddpa->ddpa_clone->ds_dir; if (ddpa->ddpa_clone->ds_is_snapshot || !dsl_dir_is_clone(dd)) { dsl_dataset_rele(ddpa->ddpa_clone, tag); return (SET_ERROR(EINVAL)); } error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, &ddpa->shared_snaps, tag); if (error != 0) goto out; error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, &ddpa->clone_snaps, tag); if (error != 0) goto out; snap = list_head(&ddpa->shared_snaps); ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, &ddpa->origin_snaps, tag); if (error != 0) goto out; if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { error = dsl_dataset_hold_obj(dp, dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, tag, &ddpa->origin_origin); if (error != 0) goto out; } out: if (error != 0) promote_rele(ddpa, tag); return (error); } static void promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag) { snaplist_destroy(&ddpa->shared_snaps, tag); snaplist_destroy(&ddpa->clone_snaps, tag); snaplist_destroy(&ddpa->origin_snaps, tag); if (ddpa->origin_origin != NULL) dsl_dataset_rele(ddpa->origin_origin, tag); dsl_dataset_rele(ddpa->ddpa_clone, tag); } /* * Promote a clone. * * If it fails due to a conflicting snapshot name, "conflsnap" will be filled * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.) */ int dsl_dataset_promote(const char *name, char *conflsnap) { dsl_dataset_promote_arg_t ddpa = { 0 }; uint64_t numsnaps; int error; nvpair_t *snap_pair; objset_t *os; /* * We will modify space proportional to the number of * snapshots. Compute numsnaps. */ error = dmu_objset_hold(name, FTAG, &os); if (error != 0) return (error); error = zap_count(dmu_objset_pool(os)->dp_meta_objset, dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, &numsnaps); dmu_objset_rele(os, FTAG); if (error != 0) return (error); ddpa.ddpa_clonename = name; ddpa.err_ds = fnvlist_alloc(); ddpa.cr = CRED(); ddpa.proc = curproc; error = dsl_sync_task(name, dsl_dataset_promote_check, dsl_dataset_promote_sync, &ddpa, 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED); /* * Return the first conflicting snapshot found. */ snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL); if (snap_pair != NULL && conflsnap != NULL) (void) strlcpy(conflsnap, nvpair_name(snap_pair), ZFS_MAX_DATASET_NAME_LEN); fnvlist_free(ddpa.err_ds); return (error); } int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) { /* * "slack" factor for received datasets with refquota set on them. * See the bottom of this function for details on its use. */ uint64_t refquota_slack = (uint64_t)DMU_MAX_ACCESS * spa_asize_inflation; int64_t unused_refres_delta; /* they should both be heads */ if (clone->ds_is_snapshot || origin_head->ds_is_snapshot) return (SET_ERROR(EINVAL)); /* if we are not forcing, the branch point should be just before them */ if (!force && clone->ds_prev != origin_head->ds_prev) return (SET_ERROR(EINVAL)); /* clone should be the clone (unless they are unrelated) */ if (clone->ds_prev != NULL && clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && origin_head->ds_dir != clone->ds_prev->ds_dir) return (SET_ERROR(EINVAL)); /* the clone should be a child of the origin */ if (clone->ds_dir->dd_parent != origin_head->ds_dir) return (SET_ERROR(EINVAL)); /* origin_head shouldn't be modified unless 'force' */ if (!force && dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) return (SET_ERROR(ETXTBSY)); /* origin_head should have no long holds (e.g. is not mounted) */ if (dsl_dataset_handoff_check(origin_head, owner, tx)) return (SET_ERROR(EBUSY)); /* check amount of any unconsumed refreservation */ unused_refres_delta = (int64_t)MIN(origin_head->ds_reserved, dsl_dataset_phys(origin_head)->ds_unique_bytes) - (int64_t)MIN(origin_head->ds_reserved, dsl_dataset_phys(clone)->ds_unique_bytes); if (unused_refres_delta > 0 && unused_refres_delta > dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) return (SET_ERROR(ENOSPC)); /* * The clone can't be too much over the head's refquota. * * To ensure that the entire refquota can be used, we allow one * transaction to exceed the refquota. Therefore, this check * needs to also allow for the space referenced to be more than the * refquota. The maximum amount of space that one transaction can use * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this * overage ensures that we are able to receive a filesystem that * exceeds the refquota on the source system. * * So that overage is the refquota_slack we use below. */ if (origin_head->ds_quota != 0 && dsl_dataset_phys(clone)->ds_referenced_bytes > origin_head->ds_quota + refquota_slack) return (SET_ERROR(EDQUOT)); return (0); } static void dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone, dsl_dataset_t *origin, dmu_tx_t *tx) { uint64_t clone_remap_dl_obj, origin_remap_dl_obj; dsl_pool_t *dp = dmu_tx_pool(tx); ASSERT(dsl_pool_sync_context(dp)); clone_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(clone); origin_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(origin); if (clone_remap_dl_obj != 0) { dsl_deadlist_close(&clone->ds_remap_deadlist); dsl_dataset_unset_remap_deadlist_object(clone, tx); } if (origin_remap_dl_obj != 0) { dsl_deadlist_close(&origin->ds_remap_deadlist); dsl_dataset_unset_remap_deadlist_object(origin, tx); } if (clone_remap_dl_obj != 0) { dsl_dataset_set_remap_deadlist_object(origin, clone_remap_dl_obj, tx); dsl_deadlist_open(&origin->ds_remap_deadlist, dp->dp_meta_objset, clone_remap_dl_obj); } if (origin_remap_dl_obj != 0) { dsl_dataset_set_remap_deadlist_object(clone, origin_remap_dl_obj, tx); dsl_deadlist_open(&clone->ds_remap_deadlist, dp->dp_meta_objset, origin_remap_dl_obj); } } void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, dsl_dataset_t *origin_head, dmu_tx_t *tx) { dsl_pool_t *dp = dmu_tx_pool(tx); int64_t unused_refres_delta; ASSERT(clone->ds_reserved == 0); /* * NOTE: On DEBUG kernels there could be a race between this and * the check function if spa_asize_inflation is adjusted... */ ASSERT(origin_head->ds_quota == 0 || dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota + DMU_MAX_ACCESS * spa_asize_inflation); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); dsl_dir_cancel_waiters(origin_head->ds_dir); /* * Swap per-dataset feature flags. */ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (!(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET)) { ASSERT(!dsl_dataset_feature_is_active(clone, f)); ASSERT(!dsl_dataset_feature_is_active(origin_head, f)); continue; } boolean_t clone_inuse = dsl_dataset_feature_is_active(clone, f); void *clone_feature = clone->ds_feature[f]; boolean_t origin_head_inuse = dsl_dataset_feature_is_active(origin_head, f); void *origin_head_feature = origin_head->ds_feature[f]; if (clone_inuse) dsl_dataset_deactivate_feature_impl(clone, f, tx); if (origin_head_inuse) dsl_dataset_deactivate_feature_impl(origin_head, f, tx); if (clone_inuse) { dsl_dataset_activate_feature(origin_head->ds_object, f, clone_feature, tx); origin_head->ds_feature[f] = clone_feature; } if (origin_head_inuse) { dsl_dataset_activate_feature(clone->ds_object, f, origin_head_feature, tx); clone->ds_feature[f] = origin_head_feature; } } dmu_buf_will_dirty(clone->ds_dbuf, tx); dmu_buf_will_dirty(origin_head->ds_dbuf, tx); if (clone->ds_objset != NULL) { dmu_objset_evict(clone->ds_objset); clone->ds_objset = NULL; } if (origin_head->ds_objset != NULL) { dmu_objset_evict(origin_head->ds_objset); origin_head->ds_objset = NULL; } unused_refres_delta = (int64_t)MIN(origin_head->ds_reserved, dsl_dataset_phys(origin_head)->ds_unique_bytes) - (int64_t)MIN(origin_head->ds_reserved, dsl_dataset_phys(clone)->ds_unique_bytes); /* * Reset origin's unique bytes. */ { dsl_dataset_t *origin = clone->ds_prev; uint64_t comp, uncomp; dmu_buf_will_dirty(origin->ds_dbuf, tx); dsl_deadlist_space_range(&clone->ds_deadlist, dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); } /* swap blkptrs */ { rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG); rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG); blkptr_t tmp; tmp = dsl_dataset_phys(origin_head)->ds_bp; dsl_dataset_phys(origin_head)->ds_bp = dsl_dataset_phys(clone)->ds_bp; dsl_dataset_phys(clone)->ds_bp = tmp; rrw_exit(&origin_head->ds_bp_rwlock, FTAG); rrw_exit(&clone->ds_bp_rwlock, FTAG); } /* set dd_*_bytes */ { int64_t dused, dcomp, duncomp; uint64_t cdl_used, cdl_comp, cdl_uncomp; uint64_t odl_used, odl_comp, odl_uncomp; ASSERT3U(dsl_dir_phys(clone->ds_dir)-> dd_used_breakdown[DD_USED_SNAP], ==, 0); dsl_deadlist_space(&clone->ds_deadlist, &cdl_used, &cdl_comp, &cdl_uncomp); dsl_deadlist_space(&origin_head->ds_deadlist, &odl_used, &odl_comp, &odl_uncomp); dused = dsl_dataset_phys(clone)->ds_referenced_bytes + cdl_used - (dsl_dataset_phys(origin_head)->ds_referenced_bytes + odl_used); dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + cdl_comp - (dsl_dataset_phys(origin_head)->ds_compressed_bytes + odl_comp); duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + cdl_uncomp - (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + odl_uncomp); dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, dused, dcomp, duncomp, tx); dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, -dused, -dcomp, -duncomp, tx); /* * The difference in the space used by snapshots is the * difference in snapshot space due to the head's * deadlist (since that's the only thing that's * changing that affects the snapused). */ dsl_deadlist_space_range(&clone->ds_deadlist, origin_head->ds_dir->dd_origin_txg, UINT64_MAX, &cdl_used, &cdl_comp, &cdl_uncomp); dsl_deadlist_space_range(&origin_head->ds_deadlist, origin_head->ds_dir->dd_origin_txg, UINT64_MAX, &odl_used, &odl_comp, &odl_uncomp); dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, DD_USED_HEAD, DD_USED_SNAP, tx); } /* swap ds_*_bytes */ SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, dsl_dataset_phys(clone)->ds_referenced_bytes); SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, dsl_dataset_phys(clone)->ds_compressed_bytes); SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, dsl_dataset_phys(clone)->ds_uncompressed_bytes); SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, dsl_dataset_phys(clone)->ds_unique_bytes); /* apply any parent delta for change in unconsumed refreservation */ dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, unused_refres_delta, 0, 0, tx); /* * Swap deadlists. */ dsl_deadlist_close(&clone->ds_deadlist); dsl_deadlist_close(&origin_head->ds_deadlist); SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, dsl_dataset_phys(clone)->ds_deadlist_obj); dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, dsl_dataset_phys(clone)->ds_deadlist_obj); dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, dsl_dataset_phys(origin_head)->ds_deadlist_obj); dsl_dataset_swap_remap_deadlists(clone, origin_head, tx); /* * If there is a bookmark at the origin, its "next dataset" is * changing, so we need to reset its FBN. */ dsl_bookmark_next_changed(origin_head, origin_head->ds_prev, tx); dsl_scan_ds_clone_swapped(origin_head, clone, tx); /* * Destroy any livelists associated with the clone or the origin, * since after the swap the corresponding livelists are no longer * valid. */ dsl_dir_remove_livelist(clone->ds_dir, tx, B_TRUE); dsl_dir_remove_livelist(origin_head->ds_dir, tx, B_TRUE); spa_history_log_internal_ds(clone, "clone swap", tx, "parent=%s", origin_head->ds_dir->dd_myname); } /* * Given a pool name and a dataset object number in that pool, * return the name of that dataset. */ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) { dsl_pool_t *dp; dsl_dataset_t *ds; int error; error = dsl_pool_hold(pname, FTAG, &dp); if (error != 0) return (error); error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); if (error == 0) { dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); } dsl_pool_rele(dp, FTAG); return (error); } int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) { int error = 0; ASSERT3S(asize, >, 0); /* * *ref_rsrv is the portion of asize that will come from any * unconsumed refreservation space. */ *ref_rsrv = 0; mutex_enter(&ds->ds_lock); /* * Make a space adjustment for reserved bytes. */ if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { ASSERT3U(*used, >=, ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); *used -= (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); *ref_rsrv = asize - MIN(asize, parent_delta(ds, asize + inflight)); } if (!check_quota || ds->ds_quota == 0) { mutex_exit(&ds->ds_lock); return (0); } /* * If they are requesting more space, and our current estimate * is over quota, they get to try again unless the actual * on-disk is over quota and there are no pending changes (which * may free up space for us). */ if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= ds->ds_quota) { if (inflight > 0 || dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) error = SET_ERROR(ERESTART); else error = SET_ERROR(EDQUOT); } mutex_exit(&ds->ds_lock); return (error); } typedef struct dsl_dataset_set_qr_arg { const char *ddsqra_name; zprop_source_t ddsqra_source; uint64_t ddsqra_value; } dsl_dataset_set_qr_arg_t; static int dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) { dsl_dataset_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; uint64_t newval; if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) return (SET_ERROR(ENOTSUP)); error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir, zfs_prop_to_name(ZFS_PROP_REFQUOTA), ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } if (newval == 0) { dsl_dataset_rele(ds, FTAG); return (0); } if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || newval < ds->ds_reserved) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOSPC)); } dsl_dataset_rele(ds, FTAG); return (0); } static void dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds = NULL; uint64_t newval; VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFQUOTA), ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, &ddsqra->ddsqra_value, tx); VERIFY0(dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); if (ds->ds_quota != newval) { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = newval; } dsl_dataset_rele(ds, FTAG); } int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, uint64_t refquota) { dsl_dataset_set_qr_arg_t ddsqra; ddsqra.ddsqra_name = dsname; ddsqra.ddsqra_source = source; ddsqra.ddsqra_value = refquota; return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } static int dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) { dsl_dataset_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; uint64_t newval, unique; if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) return (SET_ERROR(ENOTSUP)); error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } /* * If we are doing the preliminary check in open context, the * space estimates may be inaccurate. */ if (!dmu_tx_is_syncing(tx)) { dsl_dataset_rele(ds, FTAG); return (0); } mutex_enter(&ds->ds_lock); if (!DS_UNIQUE_IS_ACCURATE(ds)) dsl_dataset_recalc_head_uniq(ds); unique = dsl_dataset_phys(ds)->ds_unique_bytes; mutex_exit(&ds->ds_lock); if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { uint64_t delta = MAX(unique, newval) - MAX(unique, ds->ds_reserved); if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || (ds->ds_quota > 0 && newval > ds->ds_quota)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOSPC)); } } dsl_dataset_rele(ds, FTAG); return (0); } void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, zprop_source_t source, uint64_t value, dmu_tx_t *tx) { uint64_t newval; uint64_t unique; int64_t delta; dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), source, sizeof (value), 1, &value, tx); VERIFY0(dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); unique = dsl_dataset_phys(ds)->ds_unique_bytes; delta = MAX(0, (int64_t)(newval - unique)) - MAX(0, (int64_t)(ds->ds_reserved - unique)); ds->ds_reserved = newval; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); } static void dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds = NULL; VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); dsl_dataset_set_refreservation_sync_impl(ds, ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); dsl_dataset_rele(ds, FTAG); } int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, uint64_t refreservation) { dsl_dataset_set_qr_arg_t ddsqra; ddsqra.ddsqra_name = dsname; ddsqra.ddsqra_source = source; ddsqra.ddsqra_value = refreservation; return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, dsl_dataset_set_refreservation_sync, &ddsqra, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } typedef struct dsl_dataset_set_compression_arg { const char *ddsca_name; zprop_source_t ddsca_source; uint64_t ddsca_value; } dsl_dataset_set_compression_arg_t; static int dsl_dataset_set_compression_check(void *arg, dmu_tx_t *tx) { dsl_dataset_set_compression_arg_t *ddsca = arg; dsl_pool_t *dp = dmu_tx_pool(tx); uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value); spa_feature_t f = zio_compress_to_feature(compval); if (f == SPA_FEATURE_NONE) return (SET_ERROR(EINVAL)); if (!spa_feature_is_enabled(dp->dp_spa, f)) return (SET_ERROR(ENOTSUP)); return (0); } static void dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_set_compression_arg_t *ddsca = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds = NULL; uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value); spa_feature_t f = zio_compress_to_feature(compval); ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN); VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds)); if (zfeature_active(f, ds->ds_feature[f]) != B_TRUE) { ds->ds_feature_activation[f] = (void *)B_TRUE; dsl_dataset_activate_feature(ds->ds_object, f, ds->ds_feature_activation[f], tx); ds->ds_feature[f] = ds->ds_feature_activation[f]; } dsl_dataset_rele(ds, FTAG); } int dsl_dataset_set_compression(const char *dsname, zprop_source_t source, uint64_t compression) { dsl_dataset_set_compression_arg_t ddsca; /* * The sync task is only required for zstd in order to activate * the feature flag when the property is first set. */ if (ZIO_COMPRESS_ALGO(compression) != ZIO_COMPRESS_ZSTD) return (0); ddsca.ddsca_name = dsname; ddsca.ddsca_source = source; ddsca.ddsca_value = compression; return (dsl_sync_task(dsname, dsl_dataset_set_compression_check, dsl_dataset_set_compression_sync, &ddsca, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } /* * Return (in *usedp) the amount of space referenced by "new" that was not * referenced at the time the bookmark corresponds to. "New" may be a * snapshot or a head. The bookmark must be before new, in * new's filesystem (or its origin) -- caller verifies this. * * The written space is calculated by considering two components: First, we * ignore any freed space, and calculate the written as new's used space * minus old's used space. Next, we add in the amount of space that was freed * between the two time points, thus reducing new's used space relative to * old's. Specifically, this is the space that was born before * zbm_creation_txg, and freed before new (ie. on new's deadlist or a * previous deadlist). * * space freed [---------------------] * snapshots ---O-------O--------O-------O------ * bookmark new * * Note, the bookmark's zbm_*_bytes_refd must be valid, but if the HAS_FBN * flag is not set, we will calculate the freed_before_next based on the * next snapshot's deadlist, rather than using zbm_*_freed_before_next_snap. */ static int dsl_dataset_space_written_impl(zfs_bookmark_phys_t *bmp, dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { int err = 0; dsl_pool_t *dp = new->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); if (dsl_dataset_is_snapshot(new)) { ASSERT3U(bmp->zbm_creation_txg, <, dsl_dataset_phys(new)->ds_creation_txg); } *usedp = 0; *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; *usedp -= bmp->zbm_referenced_bytes_refd; *compp = 0; *compp += dsl_dataset_phys(new)->ds_compressed_bytes; *compp -= bmp->zbm_compressed_bytes_refd; *uncompp = 0; *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; *uncompp -= bmp->zbm_uncompressed_bytes_refd; dsl_dataset_t *snap = new; while (dsl_dataset_phys(snap)->ds_prev_snap_txg > bmp->zbm_creation_txg) { uint64_t used, comp, uncomp; dsl_deadlist_space_range(&snap->ds_deadlist, 0, bmp->zbm_creation_txg, &used, &comp, &uncomp); *usedp += used; *compp += comp; *uncompp += uncomp; uint64_t snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; if (snap != new) dsl_dataset_rele(snap, FTAG); err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); if (err != 0) break; } /* * We might not have the FBN if we are calculating written from * a snapshot (because we didn't know the correct "next" snapshot * until now). */ if (bmp->zbm_flags & ZBM_FLAG_HAS_FBN) { *usedp += bmp->zbm_referenced_freed_before_next_snap; *compp += bmp->zbm_compressed_freed_before_next_snap; *uncompp += bmp->zbm_uncompressed_freed_before_next_snap; } else { ASSERT3U(dsl_dataset_phys(snap)->ds_prev_snap_txg, ==, bmp->zbm_creation_txg); uint64_t used, comp, uncomp; dsl_deadlist_space(&snap->ds_deadlist, &used, &comp, &uncomp); *usedp += used; *compp += comp; *uncompp += uncomp; } if (snap != new) dsl_dataset_rele(snap, FTAG); return (err); } /* * Return (in *usedp) the amount of space written in new that was not * present at the time the bookmark corresponds to. New may be a * snapshot or the head. Old must be a bookmark before new, in * new's filesystem (or its origin) -- caller verifies this. */ int dsl_dataset_space_written_bookmark(zfs_bookmark_phys_t *bmp, dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { if (!(bmp->zbm_flags & ZBM_FLAG_HAS_FBN)) return (SET_ERROR(ENOTSUP)); return (dsl_dataset_space_written_impl(bmp, new, usedp, compp, uncompp)); } /* * Return (in *usedp) the amount of space written in new that is not * present in oldsnap. New may be a snapshot or the head. Old must be * a snapshot before new, in new's filesystem (or its origin). If not then * fail and return EINVAL. */ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { if (!dsl_dataset_is_before(new, oldsnap, 0)) return (SET_ERROR(EINVAL)); zfs_bookmark_phys_t zbm = { 0 }; dsl_dataset_phys_t *dsp = dsl_dataset_phys(oldsnap); zbm.zbm_guid = dsp->ds_guid; zbm.zbm_creation_txg = dsp->ds_creation_txg; zbm.zbm_creation_time = dsp->ds_creation_time; zbm.zbm_referenced_bytes_refd = dsp->ds_referenced_bytes; zbm.zbm_compressed_bytes_refd = dsp->ds_compressed_bytes; zbm.zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes; /* * If oldsnap is the origin (or origin's origin, ...) of new, * we can't easily calculate the effective FBN. Therefore, * we do not set ZBM_FLAG_HAS_FBN, so that the _impl will calculate * it relative to the correct "next": the next snapshot towards "new", * rather than the next snapshot in oldsnap's dsl_dir. */ return (dsl_dataset_space_written_impl(&zbm, new, usedp, compp, uncompp)); } /* * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, * lastsnap, and all snapshots in between are deleted. * * blocks that would be freed [---------------------------] * snapshots ---O-------O--------O-------O--------O * firstsnap lastsnap * * This is the set of blocks that were born after the snap before firstsnap, * (birth > firstsnap->prev_snap_txg) and died before the snap after the * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). * We calculate this by iterating over the relevant deadlists (from the snap * after lastsnap, backward to the snap after firstsnap), summing up the * space on the deadlist that was born after the snap before firstsnap. */ int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *lastsnap, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { int err = 0; uint64_t snapobj; dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; ASSERT(firstsnap->ds_is_snapshot); ASSERT(lastsnap->ds_is_snapshot); /* * Check that the snapshots are in the same dsl_dir, and firstsnap * is before lastsnap. */ if (firstsnap->ds_dir != lastsnap->ds_dir || dsl_dataset_phys(firstsnap)->ds_creation_txg > dsl_dataset_phys(lastsnap)->ds_creation_txg) return (SET_ERROR(EINVAL)); *usedp = *compp = *uncompp = 0; snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; while (snapobj != firstsnap->ds_object) { dsl_dataset_t *ds; uint64_t used, comp, uncomp; err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); if (err != 0) break; dsl_deadlist_space_range(&ds->ds_deadlist, dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, &used, &comp, &uncomp); *usedp += used; *compp += comp; *uncompp += uncomp; snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; ASSERT3U(snapobj, !=, 0); dsl_dataset_rele(ds, FTAG); } return (err); } /* * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. * For example, they could both be snapshots of the same filesystem, and * 'earlier' is before 'later'. Or 'earlier' could be the origin of * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's * filesystem. Or 'earlier' could be the origin's origin. * * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. */ boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, uint64_t earlier_txg) { dsl_pool_t *dp = later->ds_dir->dd_pool; int error; boolean_t ret; ASSERT(dsl_pool_config_held(dp)); ASSERT(earlier->ds_is_snapshot || earlier_txg != 0); if (earlier_txg == 0) earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; if (later->ds_is_snapshot && earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) return (B_FALSE); if (later->ds_dir == earlier->ds_dir) return (B_TRUE); /* * We check dd_origin_obj explicitly here rather than using * dsl_dir_is_clone() so that we will return TRUE if "earlier" * is $ORIGIN@$ORIGIN. dsl_dataset_space_written() depends on * this behavior. */ if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == 0) return (B_FALSE); dsl_dataset_t *origin; error = dsl_dataset_hold_obj(dp, dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); if (error != 0) return (B_FALSE); if (dsl_dataset_phys(origin)->ds_creation_txg == earlier_txg && origin->ds_dir == earlier->ds_dir) { dsl_dataset_rele(origin, FTAG); return (B_TRUE); } ret = dsl_dataset_is_before(origin, earlier, earlier_txg); dsl_dataset_rele(origin, FTAG); return (ret); } void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); } boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds) { dmu_object_info_t doi; dmu_object_info_from_db(ds->ds_dbuf, &doi); return (doi.doi_type == DMU_OTN_ZAP_METADATA); } boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds) { return (dsl_dataset_is_zapified(ds) && zap_contains(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0); } uint64_t dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds) { uint64_t remap_deadlist_obj; int err; if (!dsl_dataset_is_zapified(ds)) return (0); err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj); if (err != 0) { VERIFY3S(err, ==, ENOENT); return (0); } ASSERT(remap_deadlist_obj != 0); return (remap_deadlist_obj); } boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds) { EQUIV(dsl_deadlist_is_open(&ds->ds_remap_deadlist), dsl_dataset_get_remap_deadlist_object(ds) != 0); return (dsl_deadlist_is_open(&ds->ds_remap_deadlist)); } static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) { ASSERT(obj != 0); dsl_dataset_zapify(ds, tx); VERIFY0(zap_add(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_REMAP_DEADLIST, sizeof (obj), 1, &obj, tx)); } static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx) { VERIFY0(zap_remove(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_REMAP_DEADLIST, tx)); } void dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t remap_deadlist_object; spa_t *spa = ds->ds_dir->dd_pool->dp_spa; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dsl_dataset_remap_deadlist_exists(ds)); remap_deadlist_object = ds->ds_remap_deadlist.dl_object; dsl_deadlist_close(&ds->ds_remap_deadlist); dsl_deadlist_free(spa_meta_objset(spa), remap_deadlist_object, tx); dsl_dataset_unset_remap_deadlist_object(ds, tx); spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); } void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t remap_deadlist_obj; spa_t *spa = ds->ds_dir->dd_pool->dp_spa; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(MUTEX_HELD(&ds->ds_remap_deadlist_lock)); /* * Currently we only create remap deadlists when there are indirect * vdevs with referenced mappings. */ ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL)); remap_deadlist_obj = dsl_deadlist_clone( &ds->ds_deadlist, UINT64_MAX, dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); dsl_dataset_set_remap_deadlist_object(ds, remap_deadlist_obj, tx); dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa), remap_deadlist_obj); spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); } void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps, uint64_t num_redact_snaps, dmu_tx_t *tx) { uint64_t dsobj = ds->ds_object; struct feature_type_uint64_array_arg *ftuaa = kmem_zalloc(sizeof (*ftuaa), KM_SLEEP); ftuaa->length = (int64_t)num_redact_snaps; if (num_redact_snaps > 0) { ftuaa->array = kmem_alloc(num_redact_snaps * sizeof (uint64_t), KM_SLEEP); memcpy(ftuaa->array, redact_snaps, num_redact_snaps * sizeof (uint64_t)); } dsl_dataset_activate_feature(dsobj, SPA_FEATURE_REDACTED_DATASETS, ftuaa, tx); ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa; } /* * Find and return (in *oldest_dsobj) the oldest snapshot of the dsobj * dataset whose birth time is >= min_txg. */ int dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg, uint64_t *oldest_dsobj) { dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds); if (error != 0) return (error); uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; while (prev_obj != 0 && min_txg < prev_obj_txg) { dsl_dataset_rele(ds, FTAG); if ((error = dsl_dataset_hold_obj(dp, prev_obj, FTAG, &ds)) != 0) return (error); prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } *oldest_dsobj = ds->ds_object; dsl_dataset_rele(ds, FTAG); return (0); } ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW, "Max allowed record size"); ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW, "Allow mounting of redacted datasets"); +ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW, + "Include snapshot events in pool history/events"); + EXPORT_SYMBOL(dsl_dataset_hold); EXPORT_SYMBOL(dsl_dataset_hold_flags); EXPORT_SYMBOL(dsl_dataset_hold_obj); EXPORT_SYMBOL(dsl_dataset_hold_obj_flags); EXPORT_SYMBOL(dsl_dataset_own); EXPORT_SYMBOL(dsl_dataset_own_obj); EXPORT_SYMBOL(dsl_dataset_name); EXPORT_SYMBOL(dsl_dataset_rele); EXPORT_SYMBOL(dsl_dataset_rele_flags); EXPORT_SYMBOL(dsl_dataset_disown); EXPORT_SYMBOL(dsl_dataset_tryown); EXPORT_SYMBOL(dsl_dataset_create_sync); EXPORT_SYMBOL(dsl_dataset_create_sync_dd); EXPORT_SYMBOL(dsl_dataset_snapshot_check); EXPORT_SYMBOL(dsl_dataset_snapshot_sync); EXPORT_SYMBOL(dsl_dataset_promote); EXPORT_SYMBOL(dsl_dataset_user_hold); EXPORT_SYMBOL(dsl_dataset_user_release); EXPORT_SYMBOL(dsl_dataset_get_holds); EXPORT_SYMBOL(dsl_dataset_get_blkptr); EXPORT_SYMBOL(dsl_dataset_get_spa); EXPORT_SYMBOL(dsl_dataset_modified_since_snap); EXPORT_SYMBOL(dsl_dataset_space_written); EXPORT_SYMBOL(dsl_dataset_space_wouldfree); EXPORT_SYMBOL(dsl_dataset_sync); EXPORT_SYMBOL(dsl_dataset_block_born); EXPORT_SYMBOL(dsl_dataset_block_kill); EXPORT_SYMBOL(dsl_dataset_dirty); EXPORT_SYMBOL(dsl_dataset_stats); EXPORT_SYMBOL(dsl_dataset_fast_stat); EXPORT_SYMBOL(dsl_dataset_space); EXPORT_SYMBOL(dsl_dataset_fsid_guid); EXPORT_SYMBOL(dsl_dsobj_to_dsname); EXPORT_SYMBOL(dsl_dataset_check_quota); EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl); EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl); EXPORT_SYMBOL(dsl_dataset_feature_set_activation); diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c index 778b6b99b85e..100a48cb4f71 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c +++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c @@ -1,1284 +1,1291 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 by Joyent, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +extern int zfs_snapshot_history_enabled; + int dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) { if (!ds->ds_is_snapshot) return (SET_ERROR(EINVAL)); if (dsl_dataset_long_held(ds)) return (SET_ERROR(EBUSY)); /* * Only allow deferred destroy on pools that support it. * NOTE: deferred destroy is only supported on snapshots. */ if (defer) { if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); return (0); } /* * If this snapshot has an elevated user reference count, * we can't destroy it yet. */ if (ds->ds_userrefs > 0) return (SET_ERROR(EBUSY)); /* * Can't delete a branch point. */ if (dsl_dataset_phys(ds)->ds_num_children > 1) return (SET_ERROR(EEXIST)); return (0); } int dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) { dsl_destroy_snapshot_arg_t *ddsa = arg; const char *dsname = ddsa->ddsa_name; boolean_t defer = ddsa->ddsa_defer; dsl_pool_t *dp = dmu_tx_pool(tx); int error = 0; dsl_dataset_t *ds; error = dsl_dataset_hold(dp, dsname, FTAG, &ds); /* * If the snapshot does not exist, silently ignore it, and * dsl_destroy_snapshot_sync() will be a no-op * (it's "already destroyed"). */ if (error == ENOENT) return (0); if (error == 0) { error = dsl_destroy_snapshot_check_impl(ds, defer); dsl_dataset_rele(ds, FTAG); } return (error); } struct process_old_arg { dsl_dataset_t *ds; dsl_dataset_t *ds_prev; boolean_t after_branch_point; zio_t *pio; uint64_t used, comp, uncomp; }; static int process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) { struct process_old_arg *poa = arg; dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; ASSERT(!BP_IS_HOLE(bp)); if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx); if (poa->ds_prev && !poa->after_branch_point && bp->blk_birth > dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += bp_get_dsize_sync(dp->dp_spa, bp); } } else { poa->used += bp_get_dsize_sync(dp->dp_spa, bp); poa->comp += BP_GET_PSIZE(bp); poa->uncomp += BP_GET_UCSIZE(bp); dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); } return (0); } static void process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) { struct process_old_arg poa = { 0 }; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t deadlist_obj; ASSERT(ds->ds_deadlist.dl_oldfmt); ASSERT(ds_next->ds_deadlist.dl_oldfmt); poa.ds = ds; poa.ds_prev = ds_prev; poa.after_branch_point = after_branch_point; poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, process_old_cb, &poa, tx)); VERIFY0(zio_wait(poa.pio)); ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes); /* change snapused */ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, -poa.used, -poa.comp, -poa.uncomp, tx); /* swap next's deadlist to our deadlist */ dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_close(&ds_next->ds_deadlist); deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; dsl_dataset_phys(ds)->ds_deadlist_obj = dsl_dataset_phys(ds_next)->ds_deadlist_obj; dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj; dsl_deadlist_open(&ds->ds_deadlist, mos, dsl_dataset_phys(ds)->ds_deadlist_obj); dsl_deadlist_open(&ds_next->ds_deadlist, mos, dsl_dataset_phys(ds_next)->ds_deadlist_obj); } typedef struct remaining_clones_key { dsl_dataset_t *rck_clone; list_node_t rck_node; } remaining_clones_key_t; static remaining_clones_key_t * rck_alloc(dsl_dataset_t *clone) { remaining_clones_key_t *rck = kmem_alloc(sizeof (*rck), KM_SLEEP); rck->rck_clone = clone; return (rck); } static void dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx, list_t *stack, const void *tag) { objset_t *mos = dd->dd_pool->dp_meta_objset; /* * If it is the old version, dd_clones doesn't exist so we can't * find the clones, but dsl_deadlist_remove_key() is a no-op so it * doesn't matter. */ if (dsl_dir_phys(dd)->dd_clones == 0) return; zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { dsl_dataset_t *clone; VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, za->za_first_integer, tag, &clone)); if (clone->ds_dir->dd_origin_txg > mintxg) { dsl_deadlist_remove_key(&clone->ds_deadlist, mintxg, tx); if (dsl_dataset_remap_deadlist_exists(clone)) { dsl_deadlist_remove_key( &clone->ds_remap_deadlist, mintxg, tx); } list_insert_head(stack, rck_alloc(clone)); } else { dsl_dataset_rele(clone, tag); } } zap_cursor_fini(zc); kmem_free(za, sizeof (zap_attribute_t)); kmem_free(zc, sizeof (zap_cursor_t)); } void dsl_dir_remove_clones_key(dsl_dir_t *top_dd, uint64_t mintxg, dmu_tx_t *tx) { list_t stack; list_create(&stack, sizeof (remaining_clones_key_t), offsetof(remaining_clones_key_t, rck_node)); dsl_dir_remove_clones_key_impl(top_dd, mintxg, tx, &stack, FTAG); for (remaining_clones_key_t *rck = list_remove_head(&stack); rck != NULL; rck = list_remove_head(&stack)) { dsl_dataset_t *clone = rck->rck_clone; dsl_dir_t *clone_dir = clone->ds_dir; kmem_free(rck, sizeof (*rck)); dsl_dir_remove_clones_key_impl(clone_dir, mintxg, tx, &stack, FTAG); dsl_dataset_rele(clone, FTAG); } list_destroy(&stack); } static void dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Move blocks to be obsoleted to pool's obsolete list. */ if (dsl_dataset_remap_deadlist_exists(ds_next)) { if (!bpobj_is_open(&dp->dp_obsolete_bpobj)) dsl_pool_create_obsolete_bpobj(dp, tx); dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist, &dp->dp_obsolete_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); } /* Merge our deadlist into next's and free it. */ if (dsl_dataset_remap_deadlist_exists(ds)) { uint64_t remap_deadlist_object = dsl_dataset_get_remap_deadlist_object(ds); ASSERT(remap_deadlist_object != 0); mutex_enter(&ds_next->ds_remap_deadlist_lock); if (!dsl_dataset_remap_deadlist_exists(ds_next)) dsl_dataset_create_remap_deadlist(ds_next, tx); mutex_exit(&ds_next->ds_remap_deadlist_lock); dsl_deadlist_merge(&ds_next->ds_remap_deadlist, remap_deadlist_object, tx); dsl_dataset_destroy_remap_deadlist(ds, tx); } } void dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) { int after_branch_point = FALSE; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; dsl_dataset_t *ds_prev = NULL; uint64_t obj; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(zfs_refcount_is_zero(&ds->ds_longholds)); if (defer && (ds->ds_userrefs > 0 || dsl_dataset_phys(ds)->ds_num_children > 1)) { ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; - spa_history_log_internal_ds(ds, "defer_destroy", tx, " "); + if (zfs_snapshot_history_enabled) { + spa_history_log_internal_ds(ds, "defer_destroy", tx, + " "); + } return; } ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); - /* We need to log before removing it from the namespace. */ - spa_history_log_internal_ds(ds, "destroy", tx, " "); + if (zfs_snapshot_history_enabled) { + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, " "); + } dsl_scan_ds_destroyed(ds, tx); obj = ds->ds_object; boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (dsl_dataset_feature_is_active(ds, f)) dsl_dataset_deactivate_feature(ds, f, tx); } if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { ASSERT3P(ds->ds_prev, ==, NULL); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); after_branch_point = (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj); dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); if (after_branch_point && dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { VERIFY0(zap_add_int(mos, dsl_dataset_phys(ds_prev)-> ds_next_clones_obj, dsl_dataset_phys(ds)->ds_next_snap_obj, tx)); } } if (!after_branch_point) { dsl_dataset_phys(ds_prev)->ds_next_snap_obj = dsl_dataset_phys(ds)->ds_next_snap_obj; } } dsl_dataset_t *ds_next; uint64_t old_unique; uint64_t used = 0, comp = 0, uncomp = 0; VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next)); ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj); old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes; dmu_buf_will_dirty(ds_next->ds_dbuf, tx); dsl_dataset_phys(ds_next)->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; dsl_dataset_phys(ds_next)->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0); if (ds_next->ds_deadlist.dl_oldfmt) { process_old_deadlist(ds, ds_prev, ds_next, after_branch_point, tx); } else { /* Adjust prev's unique space. */ if (ds_prev && !after_branch_point) { dsl_deadlist_space_range(&ds_next->ds_deadlist, dsl_dataset_phys(ds_prev)->ds_prev_snap_txg, dsl_dataset_phys(ds)->ds_prev_snap_txg, &used, &comp, &uncomp); dsl_dataset_phys(ds_prev)->ds_unique_bytes += used; } /* Adjust snapused. */ dsl_deadlist_space_range(&ds_next->ds_deadlist, dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX, &used, &comp, &uncomp); dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, -used, -comp, -uncomp, tx); /* Move blocks to be freed to pool's free list. */ dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); /* Merge our deadlist into next's and free it. */ dsl_deadlist_merge(&ds_next->ds_deadlist, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); /* * We are done with the deadlist tree (generated/used * by dsl_deadlist_move_bpobj() and dsl_deadlist_merge()). * Discard it to save memory. */ dsl_deadlist_discard_tree(&ds_next->ds_deadlist); } dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_deadlist_obj = 0; dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx); if (!book_exists) { /* Collapse range in clone heads */ dsl_dir_remove_clones_key(ds->ds_dir, dsl_dataset_phys(ds)->ds_creation_txg, tx); } if (ds_next->ds_is_snapshot) { dsl_dataset_t *ds_nextnext; /* * Update next's unique to include blocks which * were previously shared by only this snapshot * and it. Those blocks will be born after the * prev snap and before this snap, and will have * died after the next snap and before the one * after that (ie. be on the snap after next's * deadlist). */ VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds_next)->ds_next_snap_obj, FTAG, &ds_nextnext)); dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, dsl_dataset_phys(ds)->ds_prev_snap_txg, dsl_dataset_phys(ds)->ds_creation_txg, &used, &comp, &uncomp); dsl_dataset_phys(ds_next)->ds_unique_bytes += used; dsl_dataset_rele(ds_nextnext, FTAG); ASSERT3P(ds_next->ds_prev, ==, NULL); /* Collapse range in this head. */ dsl_dataset_t *hds; VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds)); if (!book_exists) { /* Collapse range in this head. */ dsl_deadlist_remove_key(&hds->ds_deadlist, dsl_dataset_phys(ds)->ds_creation_txg, tx); } if (dsl_dataset_remap_deadlist_exists(hds)) { dsl_deadlist_remove_key(&hds->ds_remap_deadlist, dsl_dataset_phys(ds)->ds_creation_txg, tx); } dsl_dataset_rele(hds, FTAG); } else { ASSERT3P(ds_next->ds_prev, ==, ds); dsl_dataset_rele(ds_next->ds_prev, ds_next); ds_next->ds_prev = NULL; if (ds_prev) { VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds_next, &ds_next->ds_prev)); } dsl_dataset_recalc_head_uniq(ds_next); /* * Reduce the amount of our unconsumed refreservation * being charged to our parent by the amount of * new unique data we have gained. */ if (old_unique < ds_next->ds_reserved) { int64_t mrsdelta; uint64_t new_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes; ASSERT(old_unique <= new_unique); mrsdelta = MIN(new_unique - old_unique, ds_next->ds_reserved - old_unique); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); } } dsl_dataset_rele(ds_next, FTAG); /* * This must be done after the dsl_traverse(), because it will * re-open the objset. */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } /* remove from snapshot namespace */ dsl_dataset_t *ds_head; ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head)); VERIFY0(dsl_dataset_get_snapname(ds)); #ifdef ZFS_DEBUG { uint64_t val; int err; err = dsl_dataset_snap_lookup(ds_head, ds->ds_snapname, &val); ASSERT0(err); ASSERT3U(val, ==, obj); } #endif VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE)); dsl_dataset_rele(ds_head, FTAG); if (ds_prev != NULL) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { uint64_t count __maybe_unused; ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count) && count == 0); VERIFY0(dmu_object_free(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, tx)); } if (dsl_dataset_phys(ds)->ds_props_obj != 0) VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj, tx)); if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, tx)); dsl_dir_rele(ds->ds_dir, ds); ds->ds_dir = NULL; dmu_object_free_zapified(mos, obj, tx); } void dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) { dsl_destroy_snapshot_arg_t *ddsa = arg; const char *dsname = ddsa->ddsa_name; boolean_t defer = ddsa->ddsa_defer; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error = dsl_dataset_hold(dp, dsname, FTAG, &ds); if (error == ENOENT) return; ASSERT0(error); dsl_destroy_snapshot_sync_impl(ds, defer, tx); zvol_remove_minors(dp->dp_spa, dsname, B_TRUE); dsl_dataset_rele(ds, FTAG); } /* * The semantics of this function are described in the comment above * lzc_destroy_snaps(). To summarize: * * The snapshots must all be in the same pool. * * Snapshots that don't exist will be silently ignored (considered to be * "already deleted"). * * On success, all snaps will be destroyed and this will return 0. * On failure, no snaps will be destroyed, the errlist will be filled in, * and this will return an errno. */ int dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, nvlist_t *errlist) { if (nvlist_next_nvpair(snaps, NULL) == NULL) return (0); /* * lzc_destroy_snaps() is documented to take an nvlist whose * values "don't matter". We need to convert that nvlist to * one that we know can be converted to LUA. */ nvlist_t *snaps_normalized = fnvlist_alloc(); for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { fnvlist_add_boolean_value(snaps_normalized, nvpair_name(pair), B_TRUE); } nvlist_t *arg = fnvlist_alloc(); fnvlist_add_nvlist(arg, "snaps", snaps_normalized); fnvlist_free(snaps_normalized); fnvlist_add_boolean_value(arg, "defer", defer); nvlist_t *wrapper = fnvlist_alloc(); fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg); fnvlist_free(arg); const char *program = "arg = ...\n" "snaps = arg['snaps']\n" "defer = arg['defer']\n" "errors = { }\n" "has_errors = false\n" "for snap, v in pairs(snaps) do\n" " errno = zfs.check.destroy{snap, defer=defer}\n" " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n" " if errno == ENOENT then\n" " snaps[snap] = nil\n" " elseif errno ~= 0 then\n" " errors[snap] = errno\n" " has_errors = true\n" " end\n" "end\n" "if has_errors then\n" " return errors\n" "end\n" "for snap, v in pairs(snaps) do\n" " errno = zfs.sync.destroy{snap, defer=defer}\n" " assert(errno == 0)\n" "end\n" "return { }\n"; nvlist_t *result = fnvlist_alloc(); int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)), program, B_TRUE, 0, zfs_lua_max_memlimit, fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result); if (error != 0) { char *errorstr = NULL; (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr); if (errorstr != NULL) { zfs_dbgmsg("%s", errorstr); } fnvlist_free(wrapper); fnvlist_free(result); return (error); } fnvlist_free(wrapper); /* * lzc_destroy_snaps() is documented to fill the errlist with * int32 values, so we need to convert the int64 values that are * returned from LUA. */ int rv = 0; nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN); for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL); pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) { int32_t val = (int32_t)fnvpair_value_int64(pair); if (rv == 0) rv = val; fnvlist_add_int32(errlist, nvpair_name(pair), val); } fnvlist_free(result); return (rv); } int dsl_destroy_snapshot(const char *name, boolean_t defer) { int error; nvlist_t *nvl = fnvlist_alloc(); nvlist_t *errlist = fnvlist_alloc(); fnvlist_add_boolean(nvl, name); error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); fnvlist_free(errlist); fnvlist_free(nvl); return (error); } struct killarg { dsl_dataset_t *ds; dmu_tx_t *tx; }; static int kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { (void) spa, (void) dnp; struct killarg *ka = arg; dmu_tx_t *tx = ka->tx; if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (zb->zb_level == ZB_ZIL_LEVEL) { ASSERT(zilog != NULL); /* * It's a block in the intent log. It has no * accounting, so just free it. */ dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { ASSERT(zilog == NULL); ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); } return (0); } static void old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) { struct killarg ka; spa_history_log_internal_ds(ds, "destroy", tx, "(synchronous, mintxg=%llu)", (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg); /* * Free everything that we point to (that's born after * the previous snapshot, if we are a clone) * * NB: this should be very quick, because we already * freed all the objects in open context. */ ka.ds = ds; ka.tx = tx; VERIFY0(traverse_dataset(ds, dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST | TRAVERSE_NO_DECRYPT, kill_blkptr, &ka)); ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == 0); } int dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) { int error; uint64_t count; objset_t *mos; ASSERT(!ds->ds_is_snapshot); if (ds->ds_is_snapshot) return (SET_ERROR(EINVAL)); if (zfs_refcount_count(&ds->ds_longholds) != expected_holds) return (SET_ERROR(EBUSY)); ASSERT0(ds->ds_dir->dd_activity_waiters); mos = ds->ds_dir->dd_pool->dp_meta_objset; /* * Can't delete a head dataset if there are snapshots of it. * (Except if the only snapshots are from the branch we cloned * from.) */ if (ds->ds_prev != NULL && dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object) return (SET_ERROR(EBUSY)); /* * Can't delete if there are children of this fs. */ error = zap_count(mos, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count); if (error != 0) return (error); if (count != 0) return (SET_ERROR(EEXIST)); if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && ds->ds_prev->ds_userrefs == 0) { /* We need to remove the origin snapshot as well. */ if (!zfs_refcount_is_zero(&ds->ds_prev->ds_longholds)) return (SET_ERROR(EBUSY)); } return (0); } int dsl_destroy_head_check(void *arg, dmu_tx_t *tx) { dsl_destroy_head_arg_t *ddha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); if (error != 0) return (error); error = dsl_destroy_head_check_impl(ds, 0); dsl_dataset_rele(ds, FTAG); return (error); } static void dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) { dsl_dir_t *dd; dsl_pool_t *dp = dmu_tx_pool(tx); objset_t *mos = dp->dp_meta_objset; dd_used_t t; ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj); /* Decrement the filesystem count for all parent filesystems. */ if (dd->dd_parent != NULL) dsl_fs_ss_count_adjust(dd->dd_parent, -1, DD_FIELD_FILESYSTEM_COUNT, tx); /* * Remove our reservation. The impl() routine avoids setting the * actual property, which would require the (already destroyed) ds. */ dsl_dir_set_reservation_sync_impl(dd, 0, tx); ASSERT0(dsl_dir_phys(dd)->dd_used_bytes); ASSERT0(dsl_dir_phys(dd)->dd_reserved); for (t = 0; t < DD_USED_NUM; t++) ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); if (dd->dd_crypto_obj != 0) { dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx); (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); } VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); if (dsl_dir_phys(dd)->dd_clones != 0) VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_clones, tx)); VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); VERIFY0(zap_remove(mos, dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, dd->dd_myname, tx)); dsl_dir_rele(dd, FTAG); dmu_object_free_zapified(mos, ddobj, tx); } static void dsl_clone_destroy_assert(dsl_dir_t *dd) { uint64_t used, comp, uncomp; ASSERT(dsl_dir_is_clone(dd)); dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp); ASSERT3U(dsl_dir_phys(dd)->dd_used_bytes, ==, used); ASSERT3U(dsl_dir_phys(dd)->dd_compressed_bytes, ==, comp); /* * Greater than because we do not track embedded block pointers in * the livelist */ ASSERT3U(dsl_dir_phys(dd)->dd_uncompressed_bytes, >=, uncomp); ASSERT(list_is_empty(&dd->dd_pending_allocs.bpl_list)); ASSERT(list_is_empty(&dd->dd_pending_frees.bpl_list)); } /* * Start the delete process for a clone. Free its zil, verify the space usage * and queue the blkptrs for deletion by adding the livelist to the pool-wide * delete queue. */ static void dsl_async_clone_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t zap_obj, to_delete, used, comp, uncomp; objset_t *os; dsl_dir_t *dd = ds->ds_dir; dsl_pool_t *dp = dmu_tx_pool(tx); objset_t *mos = dp->dp_meta_objset; spa_t *spa = dmu_tx_pool(tx)->dp_spa; VERIFY0(dmu_objset_from_ds(ds, &os)); uint64_t mintxg = 0; dsl_deadlist_entry_t *dle = dsl_deadlist_first(&dd->dd_livelist); if (dle != NULL) mintxg = dle->dle_mintxg; spa_history_log_internal_ds(ds, "destroy", tx, "(livelist, mintxg=%llu)", (long long)mintxg); /* Check that the clone is in a correct state to be deleted */ dsl_clone_destroy_assert(dd); /* Destroy the zil */ zil_destroy_sync(dmu_objset_zil(os), tx); VERIFY0(zap_lookup(mos, dd->dd_object, DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &to_delete)); /* Initialize deleted_clones entry to track livelists to cleanup */ int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); if (error == ENOENT) { zap_obj = zap_create(mos, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &(zap_obj), tx)); spa->spa_livelists_to_delete = zap_obj; } else if (error != 0) { zfs_panic_recover("zfs: error %d was returned while looking " "up DMU_POOL_DELETED_CLONES in the zap", error); return; } VERIFY0(zap_add_int(mos, zap_obj, to_delete, tx)); /* Clone is no longer using space, now tracked by dp_free_dir */ dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp); dsl_dir_diduse_space(dd, DD_USED_HEAD, -used, -comp, -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); dsl_dir_remove_livelist(dd, tx, B_FALSE); zthr_wakeup(spa->spa_livelist_delete_zthr); } /* * Move the bptree into the pool's list of trees to clean up, update space * accounting information and destroy the zil. */ static void dsl_async_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) { uint64_t used, comp, uncomp; objset_t *os; VERIFY0(dmu_objset_from_ds(ds, &os)); dsl_pool_t *dp = dmu_tx_pool(tx); objset_t *mos = dp->dp_meta_objset; spa_history_log_internal_ds(ds, "destroy", tx, "(bptree, mintxg=%llu)", (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg); zil_destroy_sync(dmu_objset_zil(os), tx); if (!spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { dsl_scan_t *scn = dp->dp_scan; spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY, tx); dp->dp_bptree_obj = bptree_alloc(mos, tx); VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj, tx)); ASSERT(!scn->scn_async_destroying); scn->scn_async_destroying = B_TRUE; } used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes; comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes; uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes; ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == used); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bptree_add(mos, dp->dp_bptree_obj, &dsl_dataset_phys(ds)->ds_bp, dsl_dataset_phys(ds)->ds_prev_snap_txg, used, comp, uncomp, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, -used, -comp, -uncomp, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); } void dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) { dsl_pool_t *dp = dmu_tx_pool(tx); objset_t *mos = dp->dp_meta_objset; uint64_t obj, ddobj, prevobj = 0; boolean_t rmorigin; ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT(ds->ds_prev == NULL || dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); dsl_dir_cancel_waiters(ds->ds_dir); rmorigin = (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && ds->ds_prev->ds_userrefs == 0); /* Remove our reservation. */ if (ds->ds_reserved != 0) { dsl_dataset_set_refreservation_sync_impl(ds, (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 0, tx); ASSERT0(ds->ds_reserved); } obj = ds->ds_object; for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (dsl_dataset_feature_is_active(ds, f)) dsl_dataset_deactivate_feature(ds, f, tx); } dsl_scan_ds_destroyed(ds, tx); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { /* This is a clone */ ASSERT(ds->ds_prev != NULL); ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=, obj); ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj); dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds->ds_prev, obj, tx); } ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1); dsl_dataset_phys(ds->ds_prev)->ds_num_children--; } /* * Destroy the deadlist. Unless it's a clone, the * deadlist should be empty since the dataset has no snapshots. * (If it's a clone, it's safe to ignore the deadlist contents * since they are still referenced by the origin snapshot.) */ dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_deadlist_obj = 0; if (dsl_dataset_remap_deadlist_exists(ds)) dsl_dataset_destroy_remap_deadlist(ds, tx); /* * Each destroy is responsible for both destroying (enqueuing * to be destroyed) the blkptrs comprising the dataset as well as * those belonging to the zil. */ if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) { dsl_async_clone_destroy(ds, tx); } else if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { dsl_async_dataset_destroy(ds, tx); } else { old_synchronous_dataset_destroy(ds, tx); } if (ds->ds_prev != NULL) { if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY0(zap_remove_int(mos, dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones, ds->ds_object, tx)); } prevobj = ds->ds_prev->ds_object; dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } /* * This must be done after the dsl_traverse(), because it will * re-open the objset. */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } /* Erase the link in the dir */ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0; ddobj = ds->ds_dir->dd_object; ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0); VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx)); if (ds->ds_bookmarks_obj != 0) { void *cookie = NULL; dsl_bookmark_node_t *dbn; while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) != NULL) { if (dbn->dbn_phys.zbm_redaction_obj != 0) { VERIFY0(dmu_object_free(mos, dbn->dbn_phys.zbm_redaction_obj, tx)); spa_feature_decr(dmu_objset_spa(mos), SPA_FEATURE_REDACTION_BOOKMARKS, tx); } if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) { spa_feature_decr(dmu_objset_spa(mos), SPA_FEATURE_BOOKMARK_WRITTEN, tx); } spa_strfree(dbn->dbn_name); mutex_destroy(&dbn->dbn_lock); kmem_free(dbn, sizeof (*dbn)); } avl_destroy(&ds->ds_bookmarks); VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx)); spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); } spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj); ASSERT0(dsl_dataset_phys(ds)->ds_props_obj); ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj); dsl_dir_rele(ds->ds_dir, ds); ds->ds_dir = NULL; dmu_object_free_zapified(mos, obj, tx); dsl_dir_destroy_sync(ddobj, tx); if (rmorigin) { dsl_dataset_t *prev; VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); dsl_dataset_rele(prev, FTAG); } /* Delete errlog. */ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) spa_delete_dataset_errlog(dp->dp_spa, ds->ds_object, tx); } void dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) { dsl_destroy_head_arg_t *ddha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); dsl_destroy_head_sync_impl(ds, tx); zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE); dsl_dataset_rele(ds, FTAG); } static void dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) { dsl_destroy_head_arg_t *ddha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; spa_history_log_internal_ds(ds, "destroy begin", tx, " "); dsl_dataset_rele(ds, FTAG); } int dsl_destroy_head(const char *name) { dsl_destroy_head_arg_t ddha; int error; spa_t *spa; boolean_t isenabled; #ifdef _KERNEL zfs_destroy_unmount_origin(name); #endif error = spa_open(name, &spa, FTAG); if (error != 0) return (error); isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY); spa_close(spa, FTAG); ddha.ddha_name = name; if (!isenabled) { objset_t *os; error = dsl_sync_task(name, dsl_destroy_head_check, dsl_destroy_head_begin_sync, &ddha, 0, ZFS_SPACE_CHECK_DESTROY); if (error != 0) return (error); /* * Head deletion is processed in one txg on old pools; * remove the objects from open context so that the txg sync * is not too long. This optimization can only work for * encrypted datasets if the wrapping key is loaded. */ error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE, FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = dsl_dataset_phys(dmu_objset_ds(os))-> ds_prev_snap_txg; for (uint64_t obj = 0; error == 0; error = dmu_object_next(os, &obj, FALSE, prev_snap_txg)) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); dmu_objset_disown(os, B_TRUE, FTAG); } } return (dsl_sync_task(name, dsl_destroy_head_check, dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_DESTROY)); } /* * Note, this function is used as the callback for dmu_objset_find(). We * always return 0 so that we will continue to find and process * inconsistent datasets, even if we encounter an error trying to * process one of them. */ int dsl_destroy_inconsistent(const char *dsname, void *arg) { (void) arg; objset_t *os; if (dmu_objset_hold(dsname, FTAG, &os) == 0) { boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os)); /* * If the dataset is inconsistent because a resumable receive * has failed, then do not destroy it. */ if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os))) need_destroy = B_FALSE; dmu_objset_rele(os, FTAG); if (need_destroy) (void) dsl_destroy_head(dsname); } return (0); } #if defined(_KERNEL) EXPORT_SYMBOL(dsl_destroy_head); EXPORT_SYMBOL(dsl_destroy_head_sync_impl); EXPORT_SYMBOL(dsl_dataset_user_hold_check_one); EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl); EXPORT_SYMBOL(dsl_destroy_inconsistent); EXPORT_SYMBOL(dsl_dataset_user_release_tmp); EXPORT_SYMBOL(dsl_destroy_head_check_impl); #endif diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c index 5a64e399cf94..7460269384b4 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dir.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c @@ -1,2469 +1,2478 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2014 Joyent, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2018, loli10K . All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" /* * Filesystem and Snapshot Limits * ------------------------------ * * These limits are used to restrict the number of filesystems and/or snapshots * that can be created at a given level in the tree or below. A typical * use-case is with a delegated dataset where the administrator wants to ensure * that a user within the zone is not creating too many additional filesystems * or snapshots, even though they're not exceeding their space quota. * * The filesystem and snapshot counts are stored as extensible properties. This * capability is controlled by a feature flag and must be enabled to be used. * Once enabled, the feature is not active until the first limit is set. At * that point, future operations to create/destroy filesystems or snapshots * will validate and update the counts. * * Because the count properties will not exist before the feature is active, * the counts are updated when a limit is first set on an uninitialized * dsl_dir node in the tree (The filesystem/snapshot count on a node includes * all of the nested filesystems/snapshots. Thus, a new leaf node has a * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and * snapshot count properties on a node indicate uninitialized counts on that * node.) When first setting a limit on an uninitialized node, the code starts * at the filesystem with the new limit and descends into all sub-filesystems * to add the count properties. * * In practice this is lightweight since a limit is typically set when the * filesystem is created and thus has no children. Once valid, changing the * limit value won't require a re-traversal since the counts are already valid. * When recursively fixing the counts, if a node with a limit is encountered * during the descent, the counts are known to be valid and there is no need to * descend into that filesystem's children. The counts on filesystems above the * one with the new limit will still be uninitialized, unless a limit is * eventually set on one of those filesystems. The counts are always recursively * updated when a limit is set on a dataset, unless there is already a limit. * When a new limit value is set on a filesystem with an existing limit, it is * possible for the new limit to be less than the current count at that level * since a user who can change the limit is also allowed to exceed the limit. * * Once the feature is active, then whenever a filesystem or snapshot is * created, the code recurses up the tree, validating the new count against the * limit at each initialized level. In practice, most levels will not have a * limit set. If there is a limit at any initialized level up the tree, the * check must pass or the creation will fail. Likewise, when a filesystem or * snapshot is destroyed, the counts are recursively adjusted all the way up * the initialized nodes in the tree. Renaming a filesystem into different point * in the tree will first validate, then update the counts on each branch up to * the common ancestor. A receive will also validate the counts and then update * them. * * An exception to the above behavior is that the limit is not enforced if the * user has permission to modify the limit. This is primarily so that * recursive snapshots in the global zone always work. We want to prevent a * denial-of-service in which a lower level delegated dataset could max out its * limit and thus block recursive snapshots from being taken in the global zone. * Because of this, it is possible for the snapshot count to be over the limit * and snapshots taken in the global zone could cause a lower level dataset to * hit or exceed its limit. The administrator taking the global zone recursive * snapshot should be aware of this side-effect and behave accordingly. * For consistency, the filesystem limit is also not enforced if the user can * modify the limit. * * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check() * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by * dsl_dir_init_fs_ss_count(). */ static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); typedef struct ddulrt_arg { dsl_dir_t *ddulrta_dd; uint64_t ddlrta_txg; } ddulrt_arg_t; static void dsl_dir_evict_async(void *dbu) { dsl_dir_t *dd = dbu; int t; dsl_pool_t *dp __maybe_unused = dd->dd_pool; dd->dd_dbuf = NULL; for (t = 0; t < TXG_SIZE; t++) { ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); ASSERT(dd->dd_tempreserved[t] == 0); ASSERT(dd->dd_space_towrite[t] == 0); } if (dd->dd_parent) dsl_dir_async_rele(dd->dd_parent, dd); spa_async_close(dd->dd_pool->dp_spa, dd); if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); cv_destroy(&dd->dd_activity_cv); mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); } int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, const void *tag, dsl_dir_t **ddp) { dmu_buf_t *dbuf; dsl_dir_t *dd; dmu_object_info_t doi; int err; ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); dmu_object_info_from_db(dbuf, &doi); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); if (dd == NULL) { dsl_dir_t *winner; dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL); dsl_prop_init(dd); if (dsl_dir_is_zapified(dd)) { err = zap_lookup(dp->dp_meta_objset, ddobj, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj); if (err == 0) { /* check for on-disk format errata */ if (dsl_dir_incompatible_encryption_version( dd)) { dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; } } else if (err != ENOENT) { goto errout; } } if (dsl_dir_phys(dd)->dd_parent_obj) { err = dsl_dir_hold_obj(dp, dsl_dir_phys(dd)->dd_parent_obj, NULL, dd, &dd->dd_parent); if (err != 0) goto errout; if (tail) { #ifdef ZFS_DEBUG uint64_t foundobj; err = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(dd->dd_parent)-> dd_child_dir_zapobj, tail, sizeof (foundobj), 1, &foundobj); ASSERT(err || foundobj == ddobj); #endif (void) strlcpy(dd->dd_myname, tail, sizeof (dd->dd_myname)); } else { err = zap_value_search(dp->dp_meta_objset, dsl_dir_phys(dd->dd_parent)-> dd_child_dir_zapobj, ddobj, 0, dd->dd_myname); } if (err != 0) goto errout; } else { (void) strlcpy(dd->dd_myname, spa_name(dp->dp_spa), sizeof (dd->dd_myname)); } if (dsl_dir_is_clone(dd)) { dmu_buf_t *origin_bonus; dsl_dataset_phys_t *origin_phys; /* * We can't open the origin dataset, because * that would require opening this dsl_dir. * Just look at its phys directly instead. */ err = dmu_bonus_hold(dp->dp_meta_objset, dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_bonus); if (err != 0) goto errout; origin_phys = origin_bonus->db_data; dd->dd_origin_txg = origin_phys->ds_creation_txg; dmu_buf_rele(origin_bonus, FTAG); if (dsl_dir_is_zapified(dd)) { uint64_t obj; err = zap_lookup(dp->dp_meta_objset, dd->dd_object, DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj); if (err == 0) dsl_dir_livelist_open(dd, obj); else if (err != ENOENT) goto errout; } } - inode_timespec_t t = {0}; - zap_lookup(dd->dd_pool->dp_meta_objset, - dsl_dir_phys(dd)->dd_props_zapobj, - zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED), - sizeof (uint64_t), - sizeof (inode_timespec_t) / sizeof (uint64_t), &t); - dd->dd_snap_cmtime = t; + if (dsl_dir_is_zapified(dd)) { + inode_timespec_t t = {0}; + zap_lookup(dp->dp_meta_objset, ddobj, + zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED), + sizeof (uint64_t), + sizeof (inode_timespec_t) / sizeof (uint64_t), + &t); + dd->dd_snap_cmtime = t; + } dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async, &dd->dd_dbuf); winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu); if (winner != NULL) { if (dd->dd_parent) dsl_dir_rele(dd->dd_parent, dd); if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); cv_destroy(&dd->dd_activity_cv); mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; } else { spa_open_ref(dp->dp_spa, dd); } } /* * The dsl_dir_t has both open-to-close and instantiate-to-evict * holds on the spa. We need the open-to-close holds because * otherwise the spa_refcnt wouldn't change when we open a * dir which the spa also has open, so we could incorrectly * think it was OK to unload/export/destroy the pool. We need * the instantiate-to-evict hold because the dsl_dir_t has a * pointer to the dd_pool, which has a pointer to the spa_t. */ spa_open_ref(dp->dp_spa, tag); ASSERT3P(dd->dd_pool, ==, dp); ASSERT3U(dd->dd_object, ==, ddobj); ASSERT3P(dd->dd_dbuf, ==, dbuf); *ddp = dd; return (0); errout: if (dd->dd_parent) dsl_dir_rele(dd->dd_parent, dd); if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); cv_destroy(&dd->dd_activity_cv); mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); return (err); } void dsl_dir_rele(dsl_dir_t *dd, const void *tag) { dprintf_dd(dd, "%s\n", ""); spa_close(dd->dd_pool->dp_spa, tag); dmu_buf_rele(dd->dd_dbuf, tag); } /* * Remove a reference to the given dsl dir that is being asynchronously * released. Async releases occur from a taskq performing eviction of * dsl datasets and dirs. This process is identical to a normal release * with the exception of using the async API for releasing the reference on * the spa. */ void dsl_dir_async_rele(dsl_dir_t *dd, const void *tag) { dprintf_dd(dd, "%s\n", ""); spa_async_close(dd->dd_pool->dp_spa, tag); dmu_buf_rele(dd->dd_dbuf, tag); } /* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */ void dsl_dir_name(dsl_dir_t *dd, char *buf) { if (dd->dd_parent) { dsl_dir_name(dd->dd_parent, buf); VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); } else { buf[0] = '\0'; } if (!MUTEX_HELD(&dd->dd_lock)) { /* * recursive mutex so that we can use * dprintf_dd() with dd_lock held */ mutex_enter(&dd->dd_lock); VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); mutex_exit(&dd->dd_lock); } else { VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); } } /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */ int dsl_dir_namelen(dsl_dir_t *dd) { int result = 0; if (dd->dd_parent) { /* parent's name + 1 for the "/" */ result = dsl_dir_namelen(dd->dd_parent) + 1; } if (!MUTEX_HELD(&dd->dd_lock)) { /* see dsl_dir_name */ mutex_enter(&dd->dd_lock); result += strlen(dd->dd_myname); mutex_exit(&dd->dd_lock); } else { result += strlen(dd->dd_myname); } return (result); } static int getcomponent(const char *path, char *component, const char **nextp) { char *p; if ((path == NULL) || (path[0] == '\0')) return (SET_ERROR(ENOENT)); /* This would be a good place to reserve some namespace... */ p = strpbrk(path, "/@"); if (p && (p[1] == '/' || p[1] == '@')) { /* two separators in a row */ return (SET_ERROR(EINVAL)); } if (p == NULL || p == path) { /* * if the first thing is an @ or /, it had better be an * @ and it had better not have any more ats or slashes, * and it had better have something after the @. */ if (p != NULL && (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) return (SET_ERROR(EINVAL)); if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strlcpy(component, path, ZFS_MAX_DATASET_NAME_LEN); p = NULL; } else if (p[0] == '/') { if (p - path >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); component[p - path] = '\0'; p++; } else if (p[0] == '@') { /* * if the next separator is an @, there better not be * any more slashes. */ if (strchr(path, '/')) return (SET_ERROR(EINVAL)); if (p - path >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); component[p - path] = '\0'; } else { panic("invalid p=%p", (void *)p); } *nextp = p; return (0); } /* * Return the dsl_dir_t, and possibly the last component which couldn't * be found in *tail. The name must be in the specified dsl_pool_t. This * thread must hold the dp_config_rwlock for the pool. Returns NULL if the * path is bogus, or if tail==NULL and we couldn't parse the whole name. * (*tail)[0] == '@' means that the last component is a snapshot. */ int dsl_dir_hold(dsl_pool_t *dp, const char *name, const void *tag, dsl_dir_t **ddp, const char **tailp) { char *buf; const char *spaname, *next, *nextnext = NULL; int err; dsl_dir_t *dd; uint64_t ddobj; buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); err = getcomponent(name, buf, &next); if (err != 0) goto error; /* Make sure the name is in the specified pool. */ spaname = spa_name(dp->dp_spa); if (strcmp(buf, spaname) != 0) { err = SET_ERROR(EXDEV); goto error; } ASSERT(dsl_pool_config_held(dp)); err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); if (err != 0) { goto error; } while (next != NULL) { dsl_dir_t *child_dd; err = getcomponent(next, buf, &nextnext); if (err != 0) break; ASSERT(next[0] != '\0'); if (next[0] == '@') break; dprintf("looking up %s in obj%lld\n", buf, (longlong_t)dsl_dir_phys(dd)->dd_child_dir_zapobj); err = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(dd)->dd_child_dir_zapobj, buf, sizeof (ddobj), 1, &ddobj); if (err != 0) { if (err == ENOENT) err = 0; break; } err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd); if (err != 0) break; dsl_dir_rele(dd, tag); dd = child_dd; next = nextnext; } if (err != 0) { dsl_dir_rele(dd, tag); goto error; } /* * It's an error if there's more than one component left, or * tailp==NULL and there's any component left. */ if (next != NULL && (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { /* bad path name */ dsl_dir_rele(dd, tag); dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); err = SET_ERROR(ENOENT); } if (tailp != NULL) *tailp = next; if (err == 0) *ddp = dd; error: kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN); return (err); } /* * If the counts are already initialized for this filesystem and its * descendants then do nothing, otherwise initialize the counts. * * The counts on this filesystem, and those below, may be uninitialized due to * either the use of a pre-existing pool which did not support the * filesystem/snapshot limit feature, or one in which the feature had not yet * been enabled. * * Recursively descend the filesystem tree and update the filesystem/snapshot * counts on each filesystem below, then update the cumulative count on the * current filesystem. If the filesystem already has a count set on it, * then we know that its counts, and the counts on the filesystems below it, * are already correct, so we don't have to update this filesystem. */ static void dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) { uint64_t my_fs_cnt = 0; uint64_t my_ss_cnt = 0; dsl_pool_t *dp = dd->dd_pool; objset_t *os = dp->dp_meta_objset; zap_cursor_t *zc; zap_attribute_t *za; dsl_dataset_t *ds; ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)); ASSERT(dsl_pool_config_held(dp)); ASSERT(dmu_tx_is_syncing(tx)); dsl_dir_zapify(dd, tx); /* * If the filesystem count has already been initialized then we * don't need to recurse down any further. */ if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0) return; zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* Iterate my child dirs */ for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { dsl_dir_t *chld_dd; uint64_t count; VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG, &chld_dd)); /* * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets. */ if (chld_dd->dd_myname[0] == '$') { dsl_dir_rele(chld_dd, FTAG); continue; } my_fs_cnt++; /* count this child */ dsl_dir_init_fs_ss_count(chld_dd, tx); VERIFY0(zap_lookup(os, chld_dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count)); my_fs_cnt += count; VERIFY0(zap_lookup(os, chld_dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count)); my_ss_cnt += count; dsl_dir_rele(chld_dd, FTAG); } zap_cursor_fini(zc); /* Count my snapshots (we counted children's snapshots above) */ VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds)); for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { /* Don't count temporary snapshots */ if (za->za_name[0] != '%') my_ss_cnt++; } zap_cursor_fini(zc); dsl_dataset_rele(ds, FTAG); kmem_free(zc, sizeof (zap_cursor_t)); kmem_free(za, sizeof (zap_attribute_t)); /* we're in a sync task, update counts */ dmu_buf_will_dirty(dd->dd_dbuf, tx); VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (my_fs_cnt), 1, &my_fs_cnt, tx)); VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (my_ss_cnt), 1, &my_ss_cnt, tx)); } static int dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx) { char *ddname = (char *)arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; dsl_dir_t *dd; int error; error = dsl_dataset_hold(dp, ddname, FTAG, &ds); if (error != 0) return (error); if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOTSUP)); } dd = ds->ds_dir; if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) && dsl_dir_is_zapified(dd) && zap_contains(dp->dp_meta_objset, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EALREADY)); } dsl_dataset_rele(ds, FTAG); return (0); } static void dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx) { char *ddname = (char *)arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; spa_t *spa; VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds)); spa = dsl_dataset_get_spa(ds); if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) { /* * Since the feature was not active and we're now setting a * limit, increment the feature-active counter so that the * feature becomes active for the first time. * * We are already in a sync task so we can update the MOS. */ spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx); } /* * Since we are now setting a non-UINT64_MAX limit on the filesystem, * we need to ensure the counts are correct. Descend down the tree from * this point and update all of the counts to be accurate. */ dsl_dir_init_fs_ss_count(ds->ds_dir, tx); dsl_dataset_rele(ds, FTAG); } /* * Make sure the feature is enabled and activate it if necessary. * Since we're setting a limit, ensure the on-disk counts are valid. * This is only called by the ioctl path when setting a limit value. * * We do not need to validate the new limit, since users who can change the * limit are also allowed to exceed the limit. */ int dsl_dir_activate_fs_ss_limit(const char *ddname) { int error; error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check, dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0, ZFS_SPACE_CHECK_RESERVED); if (error == EALREADY) error = 0; return (error); } /* * Used to determine if the filesystem_limit or snapshot_limit should be * enforced. We allow the limit to be exceeded if the user has permission to * write the property value. We pass in the creds that we got in the open * context since we will always be the GZ root in syncing context. We also have * to handle the case where we are allowed to change the limit on the current * dataset, but there may be another limit in the tree above. * * We can never modify these two properties within a non-global zone. In * addition, the other checks are modeled on zfs_secpolicy_write_perms. We * can't use that function since we are already holding the dp_config_rwlock. * In addition, we already have the dd and dealing with snapshots is simplified * in this code. */ typedef enum { ENFORCE_ALWAYS, ENFORCE_NEVER, ENFORCE_ABOVE } enforce_res_t; static enforce_res_t dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr, proc_t *proc) { enforce_res_t enforce = ENFORCE_ALWAYS; uint64_t obj; dsl_dataset_t *ds; uint64_t zoned; const char *zonedstr; ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || prop == ZFS_PROP_SNAPSHOT_LIMIT); #ifdef _KERNEL if (crgetzoneid(cr) != GLOBAL_ZONEID) return (ENFORCE_ALWAYS); /* * We are checking the saved credentials of the user process, which is * not the current process. Note that we can't use secpolicy_zfs(), * because it only works if the cred is that of the current process (on * Linux). */ if (secpolicy_zfs_proc(cr, proc) == 0) return (ENFORCE_NEVER); #else (void) proc; #endif if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0) return (ENFORCE_ALWAYS); ASSERT(dsl_pool_config_held(dd->dd_pool)); if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0) return (ENFORCE_ALWAYS); zonedstr = zfs_prop_to_name(ZFS_PROP_ZONED); if (dsl_prop_get_ds(ds, zonedstr, 8, 1, &zoned, NULL) || zoned) { /* Only root can access zoned fs's from the GZ */ enforce = ENFORCE_ALWAYS; } else { if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0) enforce = ENFORCE_ABOVE; } dsl_dataset_rele(ds, FTAG); return (enforce); } /* * Check if adding additional child filesystem(s) would exceed any filesystem * limits or adding additional snapshot(s) would exceed any snapshot limits. * The prop argument indicates which limit to check. * * Note that all filesystem limits up to the root (or the highest * initialized) filesystem or the given ancestor must be satisfied. */ int dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, dsl_dir_t *ancestor, cred_t *cr, proc_t *proc) { objset_t *os = dd->dd_pool->dp_meta_objset; uint64_t limit, count; const char *count_prop; enforce_res_t enforce; int err = 0; ASSERT(dsl_pool_config_held(dd->dd_pool)); ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || prop == ZFS_PROP_SNAPSHOT_LIMIT); /* * If we're allowed to change the limit, don't enforce the limit * e.g. this can happen if a snapshot is taken by an administrative * user in the global zone (i.e. a recursive snapshot by root). * However, we must handle the case of delegated permissions where we * are allowed to change the limit on the current dataset, but there * is another limit in the tree above. */ enforce = dsl_enforce_ds_ss_limits(dd, prop, cr, proc); if (enforce == ENFORCE_NEVER) return (0); /* * e.g. if renaming a dataset with no snapshots, count adjustment * is 0. */ if (delta == 0) return (0); if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { /* * We don't enforce the limit for temporary snapshots. This is * indicated by a NULL cred_t argument. */ if (cr == NULL) return (0); count_prop = DD_FIELD_SNAPSHOT_COUNT; } else { count_prop = DD_FIELD_FILESYSTEM_COUNT; } /* * If an ancestor has been provided, stop checking the limit once we * hit that dir. We need this during rename so that we don't overcount * the check once we recurse up to the common ancestor. */ if (ancestor == dd) return (0); /* * If we hit an uninitialized node while recursing up the tree, we can * stop since we know there is no limit here (or above). The counts are * not valid on this node and we know we won't touch this node's counts. */ if (!dsl_dir_is_zapified(dd)) return (0); err = zap_lookup(os, dd->dd_object, count_prop, sizeof (count), 1, &count); if (err == ENOENT) return (0); if (err != 0) return (err); err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL, B_FALSE); if (err != 0) return (err); /* Is there a limit which we've hit? */ if (enforce == ENFORCE_ALWAYS && (count + delta) > limit) return (SET_ERROR(EDQUOT)); if (dd->dd_parent != NULL) err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop, ancestor, cr, proc); return (err); } /* * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all * parents. When a new filesystem/snapshot is created, increment the count on * all parents, and when a filesystem/snapshot is destroyed, decrement the * count. */ void dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop, dmu_tx_t *tx) { int err; objset_t *os = dd->dd_pool->dp_meta_objset; uint64_t count; ASSERT(dsl_pool_config_held(dd->dd_pool)); ASSERT(dmu_tx_is_syncing(tx)); ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 || strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0); /* * We don't do accounting for hidden ($FREE, $MOS & $ORIGIN) objsets. */ if (dd->dd_myname[0] == '$' && strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0) { return; } /* * e.g. if renaming a dataset with no snapshots, count adjustment is 0 */ if (delta == 0) return; /* * If we hit an uninitialized node while recursing up the tree, we can * stop since we know the counts are not valid on this node and we * know we shouldn't touch this node's counts. An uninitialized count * on the node indicates that either the feature has not yet been * activated or there are no limits on this part of the tree. */ if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object, prop, sizeof (count), 1, &count)) == ENOENT) return; VERIFY0(err); count += delta; /* Use a signed verify to make sure we're not neg. */ VERIFY3S(count, >=, 0); VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count, tx)); /* Roll up this additional count into our ancestors */ if (dd->dd_parent != NULL) dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx); } uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; uint64_t ddobj; dsl_dir_phys_t *ddphys; dmu_buf_t *dbuf; ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); if (pds) { VERIFY0(zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj, name, sizeof (uint64_t), 1, &ddobj, tx)); } else { /* it's the root dir */ VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); } VERIFY0(dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); ddphys = dbuf->db_data; ddphys->dd_creation_time = gethrestime_sec(); if (pds) { ddphys->dd_parent_obj = pds->dd_object; /* update the filesystem counts */ dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx); } ddphys->dd_props_zapobj = zap_create(mos, DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); ddphys->dd_child_dir_zapobj = zap_create(mos, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; dmu_buf_rele(dbuf, FTAG); return (ddobj); } boolean_t dsl_dir_is_clone(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_origin_obj && (dd->dd_pool->dp_origin_snap == NULL || dsl_dir_phys(dd)->dd_origin_obj != dd->dd_pool->dp_origin_snap->ds_object)); } uint64_t dsl_dir_get_used(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_used_bytes); } uint64_t dsl_dir_get_compressed(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_compressed_bytes); } uint64_t dsl_dir_get_quota(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_quota); } uint64_t dsl_dir_get_reservation(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_reserved); } uint64_t dsl_dir_get_compressratio(dsl_dir_t *dd) { /* a fixed point number, 100x the ratio */ return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 : (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 / dsl_dir_phys(dd)->dd_compressed_bytes)); } uint64_t dsl_dir_get_logicalused(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_uncompressed_bytes); } uint64_t dsl_dir_get_usedsnap(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]); } uint64_t dsl_dir_get_usedds(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]); } uint64_t dsl_dir_get_usedrefreserv(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]); } uint64_t dsl_dir_get_usedchild(dsl_dir_t *dd) { return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]); } void dsl_dir_get_origin(dsl_dir_t *dd, char *buf) { dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); } int dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count) { if (dsl_dir_is_zapified(dd)) { objset_t *os = dd->dd_pool->dp_meta_objset; return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (*count), 1, count)); } else { return (SET_ERROR(ENOENT)); } } int dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count) { if (dsl_dir_is_zapified(dd)) { objset_t *os = dd->dd_pool->dp_meta_objset; return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (*count), 1, count)); } else { return (SET_ERROR(ENOENT)); } } void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) { mutex_enter(&dd->dd_lock); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dsl_dir_get_quota(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, dsl_dir_get_reservation(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, dsl_dir_get_logicalused(dd)); if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, dsl_dir_get_usedsnap(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, dsl_dir_get_usedds(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, dsl_dir_get_usedrefreserv(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, dsl_dir_get_usedchild(dd)); } mutex_exit(&dd->dd_lock); uint64_t count; if (dsl_dir_get_filesystem_count(dd, &count) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT, count); } if (dsl_dir_get_snapshot_count(dd, &count) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT, count); } if (dsl_dir_is_clone(dd)) { char buf[ZFS_MAX_DATASET_NAME_LEN]; dsl_dir_get_origin(dd, buf); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); } } void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; ASSERT(dsl_dir_phys(dd)); if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(dd->dd_dbuf, dd); } } static int64_t parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) { uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved); uint64_t new_accounted = MAX(used + delta, dsl_dir_phys(dd)->dd_reserved); return (new_accounted - old_accounted); } void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); mutex_enter(&dd->dd_lock); ASSERT0(dd->dd_tempreserved[tx->tx_txg & TXG_MASK]); dprintf_dd(dd, "txg=%llu towrite=%lluK\n", (u_longlong_t)tx->tx_txg, (u_longlong_t)dd->dd_space_towrite[tx->tx_txg & TXG_MASK] / 1024); dd->dd_space_towrite[tx->tx_txg & TXG_MASK] = 0; mutex_exit(&dd->dd_lock); /* release the hold from dsl_dir_dirty */ dmu_buf_rele(dd->dd_dbuf, dd); } static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd) { uint64_t space = 0; ASSERT(MUTEX_HELD(&dd->dd_lock)); for (int i = 0; i < TXG_SIZE; i++) { space += dd->dd_space_towrite[i & TXG_MASK]; ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0); } return (space); } /* * How much space would dd have available if ancestor had delta applied * to it? If ondiskonly is set, we're only interested in what's * on-disk, not estimated pending changes. */ uint64_t dsl_dir_space_available(dsl_dir_t *dd, dsl_dir_t *ancestor, int64_t delta, int ondiskonly) { uint64_t parentspace, myspace, quota, used; /* * If there are no restrictions otherwise, assume we have * unlimited space available. */ quota = UINT64_MAX; parentspace = UINT64_MAX; if (dd->dd_parent != NULL) { parentspace = dsl_dir_space_available(dd->dd_parent, ancestor, delta, ondiskonly); } mutex_enter(&dd->dd_lock); if (dsl_dir_phys(dd)->dd_quota != 0) quota = dsl_dir_phys(dd)->dd_quota; used = dsl_dir_phys(dd)->dd_used_bytes; if (!ondiskonly) used += dsl_dir_space_towrite(dd); if (dd->dd_parent == NULL) { uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, ZFS_SPACE_CHECK_NORMAL); quota = MIN(quota, poolsize); } if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) { /* * We have some space reserved, in addition to what our * parent gave us. */ parentspace += dsl_dir_phys(dd)->dd_reserved - used; } if (dd == ancestor) { ASSERT(delta <= 0); ASSERT(used >= -delta); used += delta; if (parentspace != UINT64_MAX) parentspace -= delta; } if (used > quota) { /* over quota */ myspace = 0; } else { /* * the lesser of the space provided by our parent and * the space left in our quota */ myspace = MIN(parentspace, quota - used); } mutex_exit(&dd->dd_lock); return (myspace); } struct tempreserve { list_node_t tr_node; dsl_dir_t *tr_ds; uint64_t tr_size; }; static int dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, boolean_t ignorequota, list_t *tr_list, dmu_tx_t *tx, boolean_t first) { uint64_t txg; uint64_t quota; struct tempreserve *tr; int retval; uint64_t ref_rsrv; top_of_function: txg = tx->tx_txg; retval = EDQUOT; ref_rsrv = 0; ASSERT3U(txg, !=, 0); ASSERT3S(asize, >, 0); mutex_enter(&dd->dd_lock); /* * Check against the dsl_dir's quota. We don't add in the delta * when checking for over-quota because they get one free hit. */ uint64_t est_inflight = dsl_dir_space_towrite(dd); for (int i = 0; i < TXG_SIZE; i++) est_inflight += dd->dd_tempreserved[i]; uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes; /* * On the first iteration, fetch the dataset's used-on-disk and * refreservation values. Also, if checkrefquota is set, test if * allocating this space would exceed the dataset's refquota. */ if (first && tx->tx_objset) { int error; dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; error = dsl_dataset_check_quota(ds, !netfree, asize, est_inflight, &used_on_disk, &ref_rsrv); if (error != 0) { mutex_exit(&dd->dd_lock); DMU_TX_STAT_BUMP(dmu_tx_quota); return (error); } } /* * If this transaction will result in a net free of space, * we want to let it through. */ if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0) quota = UINT64_MAX; else quota = dsl_dir_phys(dd)->dd_quota; /* * Adjust the quota against the actual pool size at the root * minus any outstanding deferred frees. * To ensure that it's possible to remove files from a full * pool without inducing transient overcommits, we throttle * netfree transactions against a quota that is slightly larger, * but still within the pool's allocation slop. In cases where * we're very close to full, this will allow a steady trickle of * removes to get through. */ if (dd->dd_parent == NULL) { uint64_t avail = dsl_pool_unreserved_space(dd->dd_pool, (netfree) ? ZFS_SPACE_CHECK_RESERVED : ZFS_SPACE_CHECK_NORMAL); if (avail < quota) { quota = avail; retval = SET_ERROR(ENOSPC); } } /* * If they are requesting more space, and our current estimate * is over quota, they get to try again unless the actual * on-disk is over quota and there are no pending changes * or deferred frees (which may free up space for us). */ if (used_on_disk + est_inflight >= quota) { if (est_inflight > 0 || used_on_disk < quota) { retval = SET_ERROR(ERESTART); } else { ASSERT3U(used_on_disk, >=, quota); if (retval == ENOSPC && (used_on_disk - quota) < dsl_pool_deferred_space(dd->dd_pool)) { retval = SET_ERROR(ERESTART); } } dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " "quota=%lluK tr=%lluK err=%d\n", (u_longlong_t)used_on_disk>>10, (u_longlong_t)est_inflight>>10, (u_longlong_t)quota>>10, (u_longlong_t)asize>>10, retval); mutex_exit(&dd->dd_lock); DMU_TX_STAT_BUMP(dmu_tx_quota); return (retval); } /* We need to up our estimated delta before dropping dd_lock */ dd->dd_tempreserved[txg & TXG_MASK] += asize; uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, asize - ref_rsrv); mutex_exit(&dd->dd_lock); tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); tr->tr_ds = dd; tr->tr_size = asize; list_insert_tail(tr_list, tr); /* see if it's OK with our parent */ if (dd->dd_parent != NULL && parent_rsrv != 0) { /* * Recurse on our parent without recursion. This has been * observed to be potentially large stack usage even within * the test suite. Largest seen stack was 7632 bytes on linux. */ dd = dd->dd_parent; asize = parent_rsrv; ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0); first = B_FALSE; goto top_of_function; } else { return (0); } } /* * Reserve space in this dsl_dir, to be used in this tx's txg. * After the space has been dirtied (and dsl_dir_willuse_space() * has been called), the reservation should be canceled, using * dsl_dir_tempreserve_clear(). */ int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx) { int err; list_t *tr_list; if (asize == 0) { *tr_cookiep = NULL; return (0); } tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); list_create(tr_list, sizeof (struct tempreserve), offsetof(struct tempreserve, tr_node)); ASSERT3S(asize, >, 0); err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg); if (err == 0) { struct tempreserve *tr; tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); tr->tr_size = lsize; list_insert_tail(tr_list, tr); } else { if (err == EAGAIN) { /* * If arc_memory_throttle() detected that pageout * is running and we are low on memory, we delay new * non-pageout transactions to give pageout an * advantage. * * It is unfortunate to be delaying while the caller's * locks are held. */ txg_delay(dd->dd_pool, tx->tx_txg, MSEC2NSEC(10), MSEC2NSEC(10)); err = SET_ERROR(ERESTART); } } if (err == 0) { err = dsl_dir_tempreserve_impl(dd, asize, netfree, B_FALSE, tr_list, tx, B_TRUE); } if (err != 0) dsl_dir_tempreserve_clear(tr_list, tx); else *tr_cookiep = tr_list; return (err); } /* * Clear a temporary reservation that we previously made with * dsl_dir_tempreserve_space(). */ void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) { int txgidx = tx->tx_txg & TXG_MASK; list_t *tr_list = tr_cookie; struct tempreserve *tr; ASSERT3U(tx->tx_txg, !=, 0); if (tr_cookie == NULL) return; while ((tr = list_head(tr_list)) != NULL) { if (tr->tr_ds) { mutex_enter(&tr->tr_ds->dd_lock); ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, tr->tr_size); tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; mutex_exit(&tr->tr_ds->dd_lock); } else { arc_tempreserve_clear(tr->tr_size); } list_remove(tr_list, tr); kmem_free(tr, sizeof (struct tempreserve)); } kmem_free(tr_list, sizeof (list_t)); } /* * This should be called from open context when we think we're going to write * or free space, for example when dirtying data. Be conservative; it's okay * to write less space or free more, but we don't want to write more or free * less than the amount specified. * * NOTE: The behavior of this function is identical to the Illumos / FreeBSD * version however it has been adjusted to use an iterative rather than * recursive algorithm to minimize stack usage. */ void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) { int64_t parent_space; uint64_t est_used; do { mutex_enter(&dd->dd_lock); if (space > 0) dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; est_used = dsl_dir_space_towrite(dd) + dsl_dir_phys(dd)->dd_used_bytes; parent_space = parent_delta(dd, est_used, space); mutex_exit(&dd->dd_lock); /* Make sure that we clean up dd_space_to* */ dsl_dir_dirty(dd, tx); dd = dd->dd_parent; space = parent_space; } while (space && dd); } /* call from syncing context when we actually write/free space for this dd */ void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) { int64_t accounted_delta; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(type < DD_USED_NUM); dmu_buf_will_dirty(dd->dd_dbuf, tx); /* * dsl_dataset_set_refreservation_sync_impl() calls this with * dd_lock held, so that it can atomically update * ds->ds_reserved and the dsl_dir accounting, so that * dsl_dataset_check_quota() can see dataset and dir accounting * consistently. */ boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); if (needlock) mutex_enter(&dd->dd_lock); dsl_dir_phys_t *ddp = dsl_dir_phys(dd); accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used); ASSERT(used >= 0 || ddp->dd_used_bytes >= -used); ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || ddp->dd_uncompressed_bytes >= -uncompressed); ddp->dd_used_bytes += used; ddp->dd_uncompressed_bytes += uncompressed; ddp->dd_compressed_bytes += compressed; if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) { ASSERT(used >= 0 || ddp->dd_used_breakdown[type] >= -used); ddp->dd_used_breakdown[type] += used; #ifdef ZFS_DEBUG { dd_used_t t; uint64_t u = 0; for (t = 0; t < DD_USED_NUM; t++) u += ddp->dd_used_breakdown[t]; ASSERT3U(u, ==, ddp->dd_used_bytes); } #endif } if (needlock) mutex_exit(&dd->dd_lock); if (dd->dd_parent != NULL) { dsl_dir_diduse_transfer_space(dd->dd_parent, accounted_delta, compressed, uncompressed, used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); } } void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(oldtype < DD_USED_NUM); ASSERT(newtype < DD_USED_NUM); dsl_dir_phys_t *ddp = dsl_dir_phys(dd); if (delta == 0 || !(ddp->dd_flags & DD_FLAG_USED_BREAKDOWN)) return; dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); ASSERT(delta > 0 ? ddp->dd_used_breakdown[oldtype] >= delta : ddp->dd_used_breakdown[newtype] >= -delta); ASSERT(ddp->dd_used_bytes >= ABS(delta)); ddp->dd_used_breakdown[oldtype] -= delta; ddp->dd_used_breakdown[newtype] += delta; mutex_exit(&dd->dd_lock); } void dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used, int64_t compressed, int64_t uncompressed, int64_t tonew, dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) { int64_t accounted_delta; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(oldtype < DD_USED_NUM); ASSERT(newtype < DD_USED_NUM); dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); dsl_dir_phys_t *ddp = dsl_dir_phys(dd); accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used); ASSERT(used >= 0 || ddp->dd_used_bytes >= -used); ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || ddp->dd_uncompressed_bytes >= -uncompressed); ddp->dd_used_bytes += used; ddp->dd_uncompressed_bytes += uncompressed; ddp->dd_compressed_bytes += compressed; if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) { ASSERT(tonew - used <= 0 || ddp->dd_used_breakdown[oldtype] >= tonew - used); ASSERT(tonew >= 0 || ddp->dd_used_breakdown[newtype] >= -tonew); ddp->dd_used_breakdown[oldtype] -= tonew - used; ddp->dd_used_breakdown[newtype] += tonew; #ifdef ZFS_DEBUG { dd_used_t t; uint64_t u = 0; for (t = 0; t < DD_USED_NUM; t++) u += ddp->dd_used_breakdown[t]; ASSERT3U(u, ==, ddp->dd_used_bytes); } #endif } mutex_exit(&dd->dd_lock); if (dd->dd_parent != NULL) { dsl_dir_diduse_transfer_space(dd->dd_parent, accounted_delta, compressed, uncompressed, used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); } } typedef struct dsl_dir_set_qr_arg { const char *ddsqra_name; zprop_source_t ddsqra_source; uint64_t ddsqra_value; } dsl_dir_set_qr_arg_t; static int dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx) { dsl_dir_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; uint64_t towrite, newval; error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); error = dsl_prop_predict(ds->ds_dir, "quota", ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } if (newval == 0) { dsl_dataset_rele(ds, FTAG); return (0); } mutex_enter(&ds->ds_dir->dd_lock); /* * If we are doing the preliminary check in open context, and * there are pending changes, then don't fail it, since the * pending changes could under-estimate the amount of space to be * freed up. */ towrite = dsl_dir_space_towrite(ds->ds_dir); if ((dmu_tx_is_syncing(tx) || towrite == 0) && (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved || newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) { error = SET_ERROR(ENOSPC); } mutex_exit(&ds->ds_dir->dd_lock); dsl_dataset_rele(ds, FTAG); return (error); } static void dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx) { dsl_dir_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; uint64_t newval; VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, &ddsqra->ddsqra_value, tx); VERIFY0(dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), &newval)); } else { newval = ddsqra->ddsqra_value; spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval); } dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); mutex_enter(&ds->ds_dir->dd_lock); dsl_dir_phys(ds->ds_dir)->dd_quota = newval; mutex_exit(&ds->ds_dir->dd_lock); dsl_dataset_rele(ds, FTAG); } int dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) { dsl_dir_set_qr_arg_t ddsqra; ddsqra.ddsqra_name = ddname; ddsqra.ddsqra_source = source; ddsqra.ddsqra_value = quota; return (dsl_sync_task(ddname, dsl_dir_set_quota_check, dsl_dir_set_quota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } static int dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx) { dsl_dir_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; dsl_dir_t *dd; uint64_t newval, used, avail; int error; error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); dd = ds->ds_dir; /* * If we are doing the preliminary check in open context, the * space estimates may be inaccurate. */ if (!dmu_tx_is_syncing(tx)) { dsl_dataset_rele(ds, FTAG); return (0); } error = dsl_prop_predict(ds->ds_dir, zfs_prop_to_name(ZFS_PROP_RESERVATION), ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } mutex_enter(&dd->dd_lock); used = dsl_dir_phys(dd)->dd_used_bytes; mutex_exit(&dd->dd_lock); if (dd->dd_parent) { avail = dsl_dir_space_available(dd->dd_parent, NULL, 0, FALSE); } else { avail = dsl_pool_adjustedsize(dd->dd_pool, ZFS_SPACE_CHECK_NORMAL) - used; } if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) { uint64_t delta = MAX(used, newval) - MAX(used, dsl_dir_phys(dd)->dd_reserved); if (delta > avail || (dsl_dir_phys(dd)->dd_quota > 0 && newval > dsl_dir_phys(dd)->dd_quota)) error = SET_ERROR(ENOSPC); } dsl_dataset_rele(ds, FTAG); return (error); } void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { uint64_t used; int64_t delta; dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dsl_dir_phys(dd)->dd_used_bytes; delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved); dsl_dir_phys(dd)->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, delta, 0, 0, tx); } mutex_exit(&dd->dd_lock); } static void dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx) { dsl_dir_set_qr_arg_t *ddsqra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; uint64_t newval; VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION), ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, &ddsqra->ddsqra_value, tx); VERIFY0(dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval)); } else { newval = ddsqra->ddsqra_value; spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", zfs_prop_to_name(ZFS_PROP_RESERVATION), (longlong_t)newval); } dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx); dsl_dataset_rele(ds, FTAG); } int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation) { dsl_dir_set_qr_arg_t ddsqra; ddsqra.ddsqra_name = ddname; ddsqra.ddsqra_source = source; ddsqra.ddsqra_value = reservation; return (dsl_sync_task(ddname, dsl_dir_set_reservation_check, dsl_dir_set_reservation_sync, &ddsqra, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } static dsl_dir_t * closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) { for (; ds1; ds1 = ds1->dd_parent) { dsl_dir_t *dd; for (dd = ds2; dd; dd = dd->dd_parent) { if (ds1 == dd) return (dd); } } return (NULL); } /* * If delta is applied to dd, how much of that delta would be applied to * ancestor? Syncing context only. */ static int64_t would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) { if (dd == ancestor) return (delta); mutex_enter(&dd->dd_lock); delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta); mutex_exit(&dd->dd_lock); return (would_change(dd->dd_parent, delta, ancestor)); } typedef struct dsl_dir_rename_arg { const char *ddra_oldname; const char *ddra_newname; cred_t *ddra_cred; proc_t *ddra_proc; } dsl_dir_rename_arg_t; typedef struct dsl_valid_rename_arg { int char_delta; int nest_delta; } dsl_valid_rename_arg_t; static int dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { (void) dp; dsl_valid_rename_arg_t *dvra = arg; char namebuf[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(ds, namebuf); ASSERT3U(strnlen(namebuf, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); int namelen = strlen(namebuf) + dvra->char_delta; int depth = get_dataset_depth(namebuf) + dvra->nest_delta; if (namelen >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); if (dvra->nest_delta > 0 && depth >= zfs_max_dataset_nesting) return (SET_ERROR(ENAMETOOLONG)); return (0); } static int dsl_dir_rename_check(void *arg, dmu_tx_t *tx) { dsl_dir_rename_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dir_t *dd, *newparent; dsl_valid_rename_arg_t dvra; dsl_dataset_t *parentds; objset_t *parentos; const char *mynewname; int error; /* target dir should exist */ error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL); if (error != 0) return (error); /* new parent should exist */ error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, &mynewname); if (error != 0) { dsl_dir_rele(dd, FTAG); return (error); } /* can't rename to different pool */ if (dd->dd_pool != newparent->dd_pool) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (SET_ERROR(EXDEV)); } /* new name should not already exist */ if (mynewname == NULL) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (SET_ERROR(EEXIST)); } /* can't rename below anything but filesystems (eg. no ZVOLs) */ error = dsl_dataset_hold_obj(newparent->dd_pool, dsl_dir_phys(newparent)->dd_head_dataset_obj, FTAG, &parentds); if (error != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (error); } error = dmu_objset_from_ds(parentds, &parentos); if (error != 0) { dsl_dataset_rele(parentds, FTAG); dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (error); } if (dmu_objset_type(parentos) != DMU_OST_ZFS) { dsl_dataset_rele(parentds, FTAG); dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (SET_ERROR(ZFS_ERR_WRONG_PARENT)); } dsl_dataset_rele(parentds, FTAG); ASSERT3U(strnlen(ddra->ddra_newname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); ASSERT3U(strnlen(ddra->ddra_oldname, ZFS_MAX_DATASET_NAME_LEN), <, ZFS_MAX_DATASET_NAME_LEN); dvra.char_delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname); dvra.nest_delta = get_dataset_depth(ddra->ddra_newname) - get_dataset_depth(ddra->ddra_oldname); /* if the name length is growing, validate child name lengths */ if (dvra.char_delta > 0 || dvra.nest_delta > 0) { error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename, &dvra, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); if (error != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (error); } } if (dmu_tx_is_syncing(tx)) { if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { /* * Although this is the check function and we don't * normally make on-disk changes in check functions, * we need to do that here. * * Ensure this portion of the tree's counts have been * initialized in case the new parent has limits set. */ dsl_dir_init_fs_ss_count(dd, tx); } } if (newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = MAX(dsl_dir_phys(dd)->dd_used_bytes, dsl_dir_phys(dd)->dd_reserved); objset_t *os = dd->dd_pool->dp_meta_objset; uint64_t fs_cnt = 0; uint64_t ss_cnt = 0; if (dsl_dir_is_zapified(dd)) { int err; err = zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, &fs_cnt); if (err != ENOENT && err != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (err); } /* * have to add 1 for the filesystem itself that we're * moving */ fs_cnt++; err = zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, &ss_cnt); if (err != ENOENT && err != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (err); } } /* check for encryption errors */ error = dsl_dir_rename_crypt_check(dd, newparent); if (error != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (SET_ERROR(EACCES)); } /* no rename into our descendant */ if (closest_common_ancestor(dd, newparent) == dd) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_dir_transfer_possible(dd->dd_parent, newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred, ddra->ddra_proc); if (error != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (error); } } dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); return (0); } static void dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) { dsl_dir_rename_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dir_t *dd, *newparent; const char *mynewname; objset_t *mos = dp->dp_meta_objset; VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL)); VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, &mynewname)); /* Log this before we change the name. */ spa_history_log_internal_dd(dd, "rename", tx, "-> %s", ddra->ddra_newname); if (newparent != dd->dd_parent) { objset_t *os = dd->dd_pool->dp_meta_objset; uint64_t fs_cnt = 0; uint64_t ss_cnt = 0; /* * We already made sure the dd counts were initialized in the * check function. */ if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { VERIFY0(zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, &fs_cnt)); /* add 1 for the filesystem itself that we're moving */ fs_cnt++; VERIFY0(zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, &ss_cnt)); } dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt, DD_FIELD_FILESYSTEM_COUNT, tx); dsl_fs_ss_count_adjust(newparent, fs_cnt, DD_FIELD_FILESYSTEM_COUNT, tx); dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt, DD_FIELD_SNAPSHOT_COUNT, tx); dsl_fs_ss_count_adjust(newparent, ss_cnt, DD_FIELD_SNAPSHOT_COUNT, tx); dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dsl_dir_phys(dd)->dd_used_bytes, -dsl_dir_phys(dd)->dd_compressed_bytes, -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); dsl_dir_diduse_space(newparent, DD_USED_CHILD, dsl_dir_phys(dd)->dd_used_bytes, dsl_dir_phys(dd)->dd_compressed_bytes, dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); if (dsl_dir_phys(dd)->dd_reserved > dsl_dir_phys(dd)->dd_used_bytes) { uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved - dsl_dir_phys(dd)->dd_used_bytes; dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, -unused_rsrv, 0, 0, tx); dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV, unused_rsrv, 0, 0, tx); } } dmu_buf_will_dirty(dd->dd_dbuf, tx); /* remove from old parent zapobj */ VERIFY0(zap_remove(mos, dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, dd->dd_myname, tx)); (void) strlcpy(dd->dd_myname, mynewname, sizeof (dd->dd_myname)); dsl_dir_rele(dd->dd_parent, dd); dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object; VERIFY0(dsl_dir_hold_obj(dp, newparent->dd_object, NULL, dd, &dd->dd_parent)); /* add to new parent zapobj */ VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj, dd->dd_myname, 8, 1, &dd->dd_object, tx)); /* TODO: A rename callback to avoid these layering violations. */ zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname); zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname, ddra->ddra_newname, B_TRUE); dsl_prop_notify_all(dd); dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); } int dsl_dir_rename(const char *oldname, const char *newname) { dsl_dir_rename_arg_t ddra; ddra.ddra_oldname = oldname; ddra.ddra_newname = newname; ddra.ddra_cred = CRED(); ddra.ddra_proc = curproc; return (dsl_sync_task(oldname, dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3, ZFS_SPACE_CHECK_RESERVED)); } int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr, proc_t *proc) { dsl_dir_t *ancestor; int64_t adelta; uint64_t avail; int err; ancestor = closest_common_ancestor(sdd, tdd); adelta = would_change(sdd, -space, ancestor); avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); if (avail < space) return (SET_ERROR(ENOSPC)); err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT, ancestor, cr, proc); if (err != 0) return (err); err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT, ancestor, cr, proc); if (err != 0) return (err); return (0); } inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd) { inode_timespec_t t; mutex_enter(&dd->dd_lock); t = dd->dd_snap_cmtime; mutex_exit(&dd->dd_lock); return (t); } void dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx) { + dsl_pool_t *dp = dmu_tx_pool(tx); inode_timespec_t t; - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t zapobj = dsl_dir_phys(dd)->dd_props_zapobj; - const char *prop_name = zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED); - gethrestime(&t); + mutex_enter(&dd->dd_lock); dd->dd_snap_cmtime = t; - VERIFY0(zap_update(mos, zapobj, prop_name, sizeof (uint64_t), - sizeof (inode_timespec_t) / sizeof (uint64_t), &t, tx)); + if (spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_EXTENSIBLE_DATASET)) { + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t ddobj = dd->dd_object; + dsl_dir_zapify(dd, tx); + VERIFY0(zap_update(mos, ddobj, + zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED), + sizeof (uint64_t), + sizeof (inode_timespec_t) / sizeof (uint64_t), + &t, tx)); + } mutex_exit(&dd->dd_lock); } void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx) { objset_t *mos = dd->dd_pool->dp_meta_objset; dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx); } boolean_t dsl_dir_is_zapified(dsl_dir_t *dd) { dmu_object_info_t doi; dmu_object_info_from_db(dd->dd_dbuf, &doi); return (doi.doi_type == DMU_OTN_ZAP_METADATA); } void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj) { objset_t *mos = dd->dd_pool->dp_meta_objset; ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa, SPA_FEATURE_LIVELIST)); dsl_deadlist_open(&dd->dd_livelist, mos, obj); bplist_create(&dd->dd_pending_allocs); bplist_create(&dd->dd_pending_frees); } void dsl_dir_livelist_close(dsl_dir_t *dd) { dsl_deadlist_close(&dd->dd_livelist); bplist_destroy(&dd->dd_pending_allocs); bplist_destroy(&dd->dd_pending_frees); } void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total) { uint64_t obj; dsl_pool_t *dp = dmu_tx_pool(tx); spa_t *spa = dp->dp_spa; livelist_condense_entry_t to_condense = spa->spa_to_condense; if (!dsl_deadlist_is_open(&dd->dd_livelist)) return; /* * If the livelist being removed is set to be condensed, stop the * condense zthr and indicate the cancellation in the spa_to_condense * struct in case the condense no-wait synctask has already started */ zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr; if (ll_condense_thread != NULL && (to_condense.ds != NULL) && (to_condense.ds->ds_dir == dd)) { /* * We use zthr_wait_cycle_done instead of zthr_cancel * because we don't want to destroy the zthr, just have * it skip its current task. */ spa->spa_to_condense.cancelled = B_TRUE; zthr_wait_cycle_done(ll_condense_thread); /* * If we've returned from zthr_wait_cycle_done without * clearing the to_condense data structure it's either * because the no-wait synctask has started (which is * indicated by 'syncing' field of to_condense) and we * can expect it to clear to_condense on its own. * Otherwise, we returned before the zthr ran. The * checkfunc will now fail as cancelled == B_TRUE so we * can safely NULL out ds, allowing a different dir's * livelist to be condensed. * * We can be sure that the to_condense struct will not * be repopulated at this stage because both this * function and dsl_livelist_try_condense execute in * syncing context. */ if ((spa->spa_to_condense.ds != NULL) && !spa->spa_to_condense.syncing) { dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf, spa); spa->spa_to_condense.ds = NULL; } } dsl_dir_livelist_close(dd); VERIFY0(zap_lookup(dp->dp_meta_objset, dd->dd_object, DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj)); VERIFY0(zap_remove(dp->dp_meta_objset, dd->dd_object, DD_FIELD_LIVELIST, tx)); if (total) { dsl_deadlist_free(dp->dp_meta_objset, obj, tx); spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx); } } static int dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, boolean_t *in_progress) { int error = 0; ASSERT(MUTEX_HELD(&dd->dd_activity_lock)); switch (activity) { case ZFS_WAIT_DELETEQ: { #ifdef _KERNEL objset_t *os; error = dmu_objset_from_ds(ds, &os); if (error != 0) break; mutex_enter(&os->os_user_ptr_lock); void *user = dmu_objset_get_user(os); mutex_exit(&os->os_user_ptr_lock); if (dmu_objset_type(os) != DMU_OST_ZFS || user == NULL || zfs_get_vfs_flag_unmounted(os)) { *in_progress = B_FALSE; return (0); } uint64_t readonly = B_FALSE; error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly, NULL); if (error != 0) break; if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) { *in_progress = B_FALSE; return (0); } uint64_t count, unlinked_obj; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, &unlinked_obj); if (error != 0) { dsl_dataset_rele(ds, FTAG); break; } error = zap_count(os, unlinked_obj, &count); if (error == 0) *in_progress = (count != 0); break; #else /* * The delete queue is ZPL specific, and libzpool doesn't have * it. It doesn't make sense to wait for it. */ (void) ds; *in_progress = B_FALSE; break; #endif } default: panic("unrecognized value for activity %d", activity); } return (error); } int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, boolean_t *waited) { int error = 0; boolean_t in_progress; dsl_pool_t *dp = dd->dd_pool; for (;;) { dsl_pool_config_enter(dp, FTAG); error = dsl_dir_activity_in_progress(dd, ds, activity, &in_progress); dsl_pool_config_exit(dp, FTAG); if (error != 0 || !in_progress) break; *waited = B_TRUE; if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) == 0 || dd->dd_activity_cancelled) { error = SET_ERROR(EINTR); break; } } return (error); } void dsl_dir_cancel_waiters(dsl_dir_t *dd) { mutex_enter(&dd->dd_activity_lock); dd->dd_activity_cancelled = B_TRUE; cv_broadcast(&dd->dd_activity_cv); while (dd->dd_activity_waiters > 0) cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock); mutex_exit(&dd->dd_activity_lock); } #if defined(_KERNEL) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); #endif diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c index 4572a6e56f0b..e682f6c69402 100644 --- a/sys/contrib/openzfs/module/zfs/spa_errlog.c +++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c @@ -1,1302 +1,1373 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2014, Delphix. All rights reserved. - * Copyright (c) 2021, George Amanakis. All rights reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2021, 2022, George Amanakis. All rights reserved. */ /* * Routines to manage the on-disk persistent error log. * * Each pool stores a log of all logical data errors seen during normal * operation. This is actually the union of two distinct logs: the last log, * and the current log. All errors seen are logged to the current log. When a * scrub completes, the current log becomes the last log, the last log is thrown * out, and the current log is reinitialized. This way, if an error is somehow * corrected, a new scrub will show that it no longer exists, and will be * deleted from the log when the scrub completes. * * The log is stored using a ZAP object whose key is a string form of the * zbookmark_phys tuple (objset, object, level, blkid), and whose contents is an * optional 'objset:object' human-readable string describing the data. When an * error is first logged, this string will be empty, indicating that no name is * known. This prevents us from having to issue a potentially large amount of * I/O to discover the object name during an error path. Instead, we do the * calculation when the data is requested, storing the result so future queries * will be faster. * * If the head_errlog feature is enabled, a different on-disk format is used. * The error log of each head dataset is stored separately in the zap object * and keyed by the head id. This enables listing every dataset affected in * userland. In order to be able to track whether an error block has been * modified or added to snapshots since it was marked as an error, a new tuple * is introduced: zbookmark_err_phys_t. It allows the storage of the birth * transaction group of an error block on-disk. The birth transaction group is * used by check_filesystem() to assess whether this block was freed, * re-written or added to a snapshot since its marking as an error. * * This log is then shipped into an nvlist where the key is the dataset name and * the value is the object name. Userland is then responsible for uniquifying * this list and displaying it to the user. */ #include #include #include #include #include #include #include #include +#include #define NAME_MAX_LEN 64 /* * spa_upgrade_errlog_limit : A zfs module parameter that controls the number * of on-disk error log entries that will be converted to the new * format when enabling head_errlog. Defaults to 0 which converts * all log entries. */ static uint32_t spa_upgrade_errlog_limit = 0; /* * Convert a bookmark to a string. */ static void bookmark_to_name(zbookmark_phys_t *zb, char *buf, size_t len) { (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid); } /* * Convert an err_phys to a string. */ static void errphys_to_name(zbookmark_err_phys_t *zep, char *buf, size_t len) { (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", (u_longlong_t)zep->zb_object, (u_longlong_t)zep->zb_level, (u_longlong_t)zep->zb_blkid, (u_longlong_t)zep->zb_birth); } /* * Convert a string to a err_phys. */ static void name_to_errphys(char *buf, zbookmark_err_phys_t *zep) { zep->zb_object = zfs_strtonum(buf, &buf); ASSERT(*buf == ':'); zep->zb_level = (int)zfs_strtonum(buf + 1, &buf); ASSERT(*buf == ':'); zep->zb_blkid = zfs_strtonum(buf + 1, &buf); ASSERT(*buf == ':'); zep->zb_birth = zfs_strtonum(buf + 1, &buf); ASSERT(*buf == '\0'); } /* * Convert a string to a bookmark. */ static void name_to_bookmark(char *buf, zbookmark_phys_t *zb) { zb->zb_objset = zfs_strtonum(buf, &buf); ASSERT(*buf == ':'); zb->zb_object = zfs_strtonum(buf + 1, &buf); ASSERT(*buf == ':'); zb->zb_level = (int)zfs_strtonum(buf + 1, &buf); ASSERT(*buf == ':'); zb->zb_blkid = zfs_strtonum(buf + 1, &buf); ASSERT(*buf == '\0'); } #ifdef _KERNEL static void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep, zbookmark_phys_t *zb) { zb->zb_objset = dataset; zb->zb_object = zep->zb_object; zb->zb_level = zep->zb_level; zb->zb_blkid = zep->zb_blkid; } #endif static void name_to_object(char *buf, uint64_t *obj) { *obj = zfs_strtonum(buf, &buf); ASSERT(*buf == '\0'); } static int get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, uint64_t *head_dataset_id) { dsl_pool_t *dp = spa->spa_dsl_pool; dsl_dataset_t *ds; objset_t *os; dsl_pool_config_enter(dp, FTAG); int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds); if (error != 0) { dsl_pool_config_exit(dp, FTAG); return (error); } ASSERT(head_dataset_id); *head_dataset_id = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj; error = dmu_objset_from_ds(ds, &os); if (error != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); return (error); } + /* + * If the key is not loaded dbuf_dnode_findbp() will error out with + * EACCES. However in that case dnode_hold() will eventually call + * dbuf_read()->zio_wait() which may call spa_log_error(). This will + * lead to a deadlock due to us holding the mutex spa_errlist_lock. + * Avoid this by checking here if the keys are loaded, if not return. + * If the keys are not loaded the head_errlog feature is meaningless + * as we cannot figure out the birth txg of the block pointer. + */ + if (dsl_dataset_get_keystatus(ds->ds_dir) == + ZFS_KEYSTATUS_UNAVAILABLE) { + zep->zb_birth = 0; + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + return (0); + } + dnode_t *dn; blkptr_t bp; error = dnode_hold(os, zep->zb_object, FTAG, &dn); if (error != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); return (error); } rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, NULL); - if (error == 0 && BP_IS_HOLE(&bp)) error = SET_ERROR(ENOENT); - zep->zb_birth = bp.blk_birth; + /* + * If the key is loaded but the encrypted filesystem is unmounted when + * a scrub is run, then dbuf_dnode_findbp() will still error out with + * EACCES (possibly due to the key mapping being removed upon + * unmounting). In that case the head_errlog feature is also + * meaningless as we cannot figure out the birth txg of the block + * pointer. + */ + if (error == EACCES) + error = 0; + else if (!error) + zep->zb_birth = bp.blk_birth; + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); return (error); } /* * Log an uncorrectable error to the persistent error log. We add it to the * spa's list of pending errors. The changes are actually synced out to disk * during spa_errlog_sync(). */ void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) { spa_error_entry_t search; spa_error_entry_t *new; avl_tree_t *tree; avl_index_t where; /* * If we are trying to import a pool, ignore any errors, as we won't be * writing to the pool any time soon. */ if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT) return; mutex_enter(&spa->spa_errlist_lock); /* * If we have had a request to rotate the log, log it to the next list * instead of the current one. */ if (spa->spa_scrub_active || spa->spa_scrub_finished) tree = &spa->spa_errlist_scrub; else tree = &spa->spa_errlist_last; search.se_bookmark = *zb; if (avl_find(tree, &search, &where) != NULL) { mutex_exit(&spa->spa_errlist_lock); return; } new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); new->se_bookmark = *zb; avl_insert(tree, new, where); mutex_exit(&spa->spa_errlist_lock); } #ifdef _KERNEL static int find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep, uint64_t *birth_txg) { objset_t *os; int error = dmu_objset_from_ds(ds, &os); if (error != 0) return (error); dnode_t *dn; blkptr_t bp; error = dnode_hold(os, zep->zb_object, FTAG, &dn); if (error != 0) return (error); rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, NULL); - if (error == 0 && BP_IS_HOLE(&bp)) error = SET_ERROR(ENOENT); *birth_txg = bp.blk_birth; rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (error); } /* * This function serves a double role. If only_count is true, it returns * (in *count) how many times an error block belonging to this filesystem is * referenced by snapshots or clones. If only_count is false, each time the * error block is referenced by a snapshot or clone, it fills the userspace * array at uaddr with the bookmarks of the error blocks. The array is filled * from the back and *count is modified to be the number of unused entries at * the beginning of the array. */ static int check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t *count, void *uaddr, boolean_t only_count) { dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds); if (error != 0) return (error); uint64_t latest_txg; uint64_t txg_to_consider = spa->spa_syncing_txg; boolean_t check_snapshot = B_TRUE; error = find_birth_txg(ds, zep, &latest_txg); - if (error == 0) { - if (zep->zb_birth == latest_txg) { - /* Block neither free nor rewritten. */ - if (!only_count) { - zbookmark_phys_t zb; - zep_to_zb(head_ds, zep, &zb); - if (copyout(&zb, (char *)uaddr + (*count - 1) - * sizeof (zbookmark_phys_t), - sizeof (zbookmark_phys_t)) != 0) { - dsl_dataset_rele(ds, FTAG); - return (SET_ERROR(EFAULT)); - } - (*count)--; - } else { - (*count)++; + + /* + * If we cannot figure out the current birth txg of the block pointer + * error out. If the filesystem is encrypted and the key is not loaded + * or the encrypted filesystem is not mounted the error will be EACCES. + * In that case do not return an error. + */ + if (error == EACCES) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + if (zep->zb_birth == latest_txg) { + /* Block neither free nor rewritten. */ + if (!only_count) { + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + if (copyout(&zb, (char *)uaddr + (*count - 1) + * sizeof (zbookmark_phys_t), + sizeof (zbookmark_phys_t)) != 0) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EFAULT)); } - check_snapshot = B_FALSE; + (*count)--; } else { - ASSERT3U(zep->zb_birth, <, latest_txg); - txg_to_consider = latest_txg; + (*count)++; } + check_snapshot = B_FALSE; + } else { + ASSERT3U(zep->zb_birth, <, latest_txg); + txg_to_consider = latest_txg; } /* How many snapshots reference this block. */ uint64_t snap_count; error = zap_count(spa->spa_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } if (snap_count == 0) { /* File system has no snapshot. */ dsl_dataset_rele(ds, FTAG); return (0); } uint64_t *snap_obj_array = kmem_alloc(snap_count * sizeof (uint64_t), KM_SLEEP); int aff_snap_count = 0; uint64_t snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; /* Check only snapshots created from this file system. */ while (snap_obj != 0 && zep->zb_birth < snap_obj_txg && snap_obj_txg <= txg_to_consider) { dsl_dataset_rele(ds, FTAG); error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds); if (error != 0) goto out; if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds) break; boolean_t affected = B_TRUE; if (check_snapshot) { uint64_t blk_txg; error = find_birth_txg(ds, zep, &blk_txg); affected = (error == 0 && zep->zb_birth == blk_txg); } if (affected) { snap_obj_array[aff_snap_count] = snap_obj; aff_snap_count++; if (!only_count) { zbookmark_phys_t zb; zep_to_zb(snap_obj, zep, &zb); if (copyout(&zb, (char *)uaddr + (*count - 1) * sizeof (zbookmark_phys_t), sizeof (zbookmark_phys_t)) != 0) { dsl_dataset_rele(ds, FTAG); error = SET_ERROR(EFAULT); goto out; } (*count)--; } else { (*count)++; } /* * Only clones whose origins were affected could also * have affected snapshots. */ zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { error = check_filesystem(spa, za.za_first_integer, zep, count, uaddr, only_count); if (error != 0) { zap_cursor_fini(&zc); goto out; } } zap_cursor_fini(&zc); } snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } dsl_dataset_rele(ds, FTAG); out: kmem_free(snap_obj_array, sizeof (*snap_obj_array)); return (error); } static int find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t *top_affected_fs) { uint64_t oldest_dsobj; int error = dsl_dataset_oldest_snapshot(spa, head_ds, zep->zb_birth, &oldest_dsobj); if (error != 0) return (error); dsl_dataset_t *ds; error = dsl_dataset_hold_obj(spa->spa_dsl_pool, oldest_dsobj, FTAG, &ds); if (error != 0) return (error); *top_affected_fs = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj; dsl_dataset_rele(ds, FTAG); return (0); } static int process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t *count, void *uaddr, boolean_t only_count) { dsl_pool_t *dp = spa->spa_dsl_pool; - dsl_pool_config_enter(dp, FTAG); uint64_t top_affected_fs; + /* + * If the zb_birth is 0 it means we failed to retrieve the birth txg + * of the block pointer. This happens when an encrypted filesystem is + * not mounted or when the key is not loaded. Do not proceed to + * check_filesystem(), instead do the accounting here. + */ + if (zep->zb_birth == 0) { + if (!only_count) { + zbookmark_phys_t zb; + zep_to_zb(head_ds, zep, &zb); + if (copyout(&zb, (char *)uaddr + (*count - 1) + * sizeof (zbookmark_phys_t), + sizeof (zbookmark_phys_t)) != 0) { + return (SET_ERROR(EFAULT)); + } + (*count)--; + } else { + (*count)++; + } + return (0); + } + + dsl_pool_config_enter(dp, FTAG); int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs); if (error == 0) error = check_filesystem(spa, top_affected_fs, zep, count, uaddr, only_count); dsl_pool_config_exit(dp, FTAG); return (error); } static uint64_t get_errlog_size(spa_t *spa, uint64_t spa_err_obj) { if (spa_err_obj == 0) return (0); uint64_t total = 0; zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { zap_cursor_t head_ds_cursor; zap_attribute_t head_ds_attr; zbookmark_err_phys_t head_ds_block; uint64_t head_ds; name_to_object(za.za_name, &head_ds); for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset, za.za_first_integer); zap_cursor_retrieve(&head_ds_cursor, &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) { name_to_errphys(head_ds_attr.za_name, &head_ds_block); (void) process_error_block(spa, head_ds, &head_ds_block, &total, NULL, B_TRUE); } zap_cursor_fini(&head_ds_cursor); } zap_cursor_fini(&zc); return (total); } static uint64_t get_errlist_size(spa_t *spa, avl_tree_t *tree) { if (avl_numnodes(tree) == 0) return (0); uint64_t total = 0; spa_error_entry_t *se; for (se = avl_first(tree); se != NULL; se = AVL_NEXT(tree, se)) { zbookmark_err_phys_t zep; zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; /* * If we cannot find out the head dataset and birth txg of * the present error block, we opt not to error out. In the * next pool sync this information will be retrieved by * sync_error_list() and written to the on-disk error log. */ uint64_t head_ds_obj; - if (get_head_and_birth_txg(spa, &zep, - se->se_bookmark.zb_objset, &head_ds_obj) == 0) + int error = get_head_and_birth_txg(spa, &zep, + se->se_bookmark.zb_objset, &head_ds_obj); + + if (!error) (void) process_error_block(spa, head_ds_obj, &zep, &total, NULL, B_TRUE); } return (total); } #endif /* * If a healed bookmark matches an entry in the error log we stash it in a tree * so that we can later remove the related log entries in sync context. */ static void spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb) { char name[NAME_MAX_LEN]; if (obj == 0) return; bookmark_to_name(healed_zb, name, sizeof (name)); mutex_enter(&spa->spa_errlog_lock); if (zap_contains(spa->spa_meta_objset, obj, name) == 0) { /* * Found an error matching healed zb, add zb to our * tree of healed errors */ avl_tree_t *tree = &spa->spa_errlist_healed; spa_error_entry_t search; spa_error_entry_t *new; avl_index_t where; search.se_bookmark = *healed_zb; mutex_enter(&spa->spa_errlist_lock); if (avl_find(tree, &search, &where) != NULL) { mutex_exit(&spa->spa_errlist_lock); mutex_exit(&spa->spa_errlog_lock); return; } new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); new->se_bookmark = *healed_zb; avl_insert(tree, new, where); mutex_exit(&spa->spa_errlist_lock); } mutex_exit(&spa->spa_errlog_lock); } /* * If this error exists in the given tree remove it. */ static void remove_error_from_list(spa_t *spa, avl_tree_t *t, const zbookmark_phys_t *zb) { spa_error_entry_t search, *found; avl_index_t where; mutex_enter(&spa->spa_errlist_lock); search.se_bookmark = *zb; if ((found = avl_find(t, &search, &where)) != NULL) { avl_remove(t, found); kmem_free(found, sizeof (spa_error_entry_t)); } mutex_exit(&spa->spa_errlist_lock); } /* * Removes all of the recv healed errors from both on-disk error logs */ static void spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx) { char name[NAME_MAX_LEN]; spa_error_entry_t *se; void *cookie = NULL; ASSERT(MUTEX_HELD(&spa->spa_errlog_lock)); while ((se = avl_destroy_nodes(&spa->spa_errlist_healed, &cookie)) != NULL) { remove_error_from_list(spa, s, &se->se_bookmark); remove_error_from_list(spa, l, &se->se_bookmark); bookmark_to_name(&se->se_bookmark, name, sizeof (name)); kmem_free(se, sizeof (spa_error_entry_t)); (void) zap_remove(spa->spa_meta_objset, spa->spa_errlog_last, name, tx); (void) zap_remove(spa->spa_meta_objset, spa->spa_errlog_scrub, name, tx); } } /* * Stash away healed bookmarks to remove them from the on-disk error logs * later in spa_remove_healed_errors(). */ void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb) { char name[NAME_MAX_LEN]; bookmark_to_name(zb, name, sizeof (name)); spa_add_healed_error(spa, spa->spa_errlog_last, zb); spa_add_healed_error(spa, spa->spa_errlog_scrub, zb); } /* * Return the number of errors currently in the error log. This is actually the * sum of both the last log and the current log, since we don't know the union * of these logs until we reach userland. */ uint64_t spa_get_errlog_size(spa_t *spa) { uint64_t total = 0; if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { mutex_enter(&spa->spa_errlog_lock); uint64_t count; if (spa->spa_errlog_scrub != 0 && zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub, &count) == 0) total += count; if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished && zap_count(spa->spa_meta_objset, spa->spa_errlog_last, &count) == 0) total += count; mutex_exit(&spa->spa_errlog_lock); mutex_enter(&spa->spa_errlist_lock); total += avl_numnodes(&spa->spa_errlist_last); total += avl_numnodes(&spa->spa_errlist_scrub); mutex_exit(&spa->spa_errlist_lock); } else { #ifdef _KERNEL mutex_enter(&spa->spa_errlog_lock); total += get_errlog_size(spa, spa->spa_errlog_last); total += get_errlog_size(spa, spa->spa_errlog_scrub); mutex_exit(&spa->spa_errlog_lock); mutex_enter(&spa->spa_errlist_lock); total += get_errlist_size(spa, &spa->spa_errlist_last); total += get_errlist_size(spa, &spa->spa_errlist_scrub); mutex_exit(&spa->spa_errlist_lock); #endif } return (total); } /* * This function sweeps through an on-disk error log and stores all bookmarks * as error bookmarks in a new ZAP object. At the end we discard the old one, * and spa_update_errlog() will set the spa's on-disk error log to new ZAP * object. */ static void sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, dmu_tx_t *tx) { zap_cursor_t zc; zap_attribute_t za; zbookmark_phys_t zb; uint64_t count; *newobj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); /* * If we cannnot perform the upgrade we should clear the old on-disk * error logs. */ if (zap_count(spa->spa_meta_objset, spa_err_obj, &count) != 0) { VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx)); return; } for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { if (spa_upgrade_errlog_limit != 0 && zc.zc_cd == spa_upgrade_errlog_limit) break; name_to_bookmark(za.za_name, &zb); zbookmark_err_phys_t zep; zep.zb_object = zb.zb_object; zep.zb_level = zb.zb_level; zep.zb_blkid = zb.zb_blkid; + zep.zb_birth = 0; /* * We cannot use get_head_and_birth_txg() because it will * acquire the pool config lock, which we already have. In case * of an error we simply continue. */ uint64_t head_dataset_obj; dsl_pool_t *dp = spa->spa_dsl_pool; dsl_dataset_t *ds; objset_t *os; int error = dsl_dataset_hold_obj(dp, zb.zb_objset, FTAG, &ds); if (error != 0) continue; head_dataset_obj = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj; /* * The objset and the dnode are required for getting the block * pointer, which is used to determine if BP_IS_HOLE(). If * getting the objset or the dnode fails, do not create a * zap entry (presuming we know the dataset) as this may create * spurious errors that we cannot ever resolve. If an error is * truly persistent, it should re-appear after a scan. */ if (dmu_objset_from_ds(ds, &os) != 0) { dsl_dataset_rele(ds, FTAG); continue; } dnode_t *dn; blkptr_t bp; if (dnode_hold(os, zep.zb_object, FTAG, &dn) != 0) { dsl_dataset_rele(ds, FTAG); continue; } rw_enter(&dn->dn_struct_rwlock, RW_READER); error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp, NULL, NULL); + if (error == EACCES) + error = 0; + else if (!error) + zep.zb_birth = bp.blk_birth; - zep.zb_birth = bp.blk_birth; rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); dsl_dataset_rele(ds, FTAG); if (error != 0 || BP_IS_HOLE(&bp)) continue; uint64_t err_obj; error = zap_lookup_int_key(spa->spa_meta_objset, *newobj, head_dataset_obj, &err_obj); if (error == ENOENT) { err_obj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); (void) zap_update_int_key(spa->spa_meta_objset, *newobj, head_dataset_obj, err_obj, tx); } char buf[64]; errphys_to_name(&zep, buf, sizeof (buf)); const char *name = ""; (void) zap_update(spa->spa_meta_objset, err_obj, buf, 1, strlen(name) + 1, name, tx); } zap_cursor_fini(&zc); VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx)); } void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx) { uint64_t newobj = 0; mutex_enter(&spa->spa_errlog_lock); if (spa->spa_errlog_last != 0) { sync_upgrade_errlog(spa, spa->spa_errlog_last, &newobj, tx); spa->spa_errlog_last = newobj; } if (spa->spa_errlog_scrub != 0) { sync_upgrade_errlog(spa, spa->spa_errlog_scrub, &newobj, tx); spa->spa_errlog_scrub = newobj; } mutex_exit(&spa->spa_errlog_lock); } #ifdef _KERNEL /* * If an error block is shared by two datasets it will be counted twice. For * detailed message see spa_get_errlog_size() above. */ static int process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) { zap_cursor_t zc; zap_attribute_t za; if (obj == 0) return (0); if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { for (zap_cursor_init(&zc, spa->spa_meta_objset, obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { if (*count == 0) { zap_cursor_fini(&zc); return (SET_ERROR(ENOMEM)); } zbookmark_phys_t zb; name_to_bookmark(za.za_name, &zb); if (copyout(&zb, (char *)uaddr + (*count - 1) * sizeof (zbookmark_phys_t), sizeof (zbookmark_phys_t)) != 0) { zap_cursor_fini(&zc); return (SET_ERROR(EFAULT)); } *count -= 1; } zap_cursor_fini(&zc); return (0); } for (zap_cursor_init(&zc, spa->spa_meta_objset, obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { zap_cursor_t head_ds_cursor; zap_attribute_t head_ds_attr; uint64_t head_ds_err_obj = za.za_first_integer; uint64_t head_ds; name_to_object(za.za_name, &head_ds); for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset, head_ds_err_obj); zap_cursor_retrieve(&head_ds_cursor, &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) { zbookmark_err_phys_t head_ds_block; name_to_errphys(head_ds_attr.za_name, &head_ds_block); int error = process_error_block(spa, head_ds, &head_ds_block, count, uaddr, B_FALSE); if (error != 0) { zap_cursor_fini(&head_ds_cursor); zap_cursor_fini(&zc); return (error); } } zap_cursor_fini(&head_ds_cursor); } zap_cursor_fini(&zc); return (0); } static int process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count) { spa_error_entry_t *se; if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { if (*count == 0) return (SET_ERROR(ENOMEM)); if (copyout(&se->se_bookmark, (char *)uaddr + (*count - 1) * sizeof (zbookmark_phys_t), sizeof (zbookmark_phys_t)) != 0) return (SET_ERROR(EFAULT)); *count -= 1; } return (0); } for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { zbookmark_err_phys_t zep; zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; uint64_t head_ds_obj; int error = get_head_and_birth_txg(spa, &zep, se->se_bookmark.zb_objset, &head_ds_obj); - if (error != 0) - return (error); - error = process_error_block(spa, head_ds_obj, &zep, count, - uaddr, B_FALSE); - if (error != 0) + if (!error) + error = process_error_block(spa, head_ds_obj, &zep, + count, uaddr, B_FALSE); + if (error) return (error); } return (0); } #endif /* * Copy all known errors to userland as an array of bookmarks. This is * actually a union of the on-disk last log and current log, as well as any * pending error requests. * * Because the act of reading the on-disk log could cause errors to be * generated, we have two separate locks: one for the error log and one for the * in-core error lists. We only need the error list lock to log and error, so * we grab the error log lock while we read the on-disk logs, and only pick up * the error list lock when we are finished. */ int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count) { int ret = 0; #ifdef _KERNEL mutex_enter(&spa->spa_errlog_lock); ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count); if (!ret && !spa->spa_scrub_finished) ret = process_error_log(spa, spa->spa_errlog_last, uaddr, count); mutex_enter(&spa->spa_errlist_lock); if (!ret) ret = process_error_list(spa, &spa->spa_errlist_scrub, uaddr, count); if (!ret) ret = process_error_list(spa, &spa->spa_errlist_last, uaddr, count); mutex_exit(&spa->spa_errlist_lock); mutex_exit(&spa->spa_errlog_lock); #else (void) spa, (void) uaddr, (void) count; #endif return (ret); } /* * Called when a scrub completes. This simply set a bit which tells which AVL * tree to add new errors. spa_errlog_sync() is responsible for actually * syncing the changes to the underlying objects. */ void spa_errlog_rotate(spa_t *spa) { mutex_enter(&spa->spa_errlist_lock); spa->spa_scrub_finished = B_TRUE; mutex_exit(&spa->spa_errlist_lock); } /* * Discard any pending errors from the spa_t. Called when unloading a faulted * pool, as the errors encountered during the open cannot be synced to disk. */ void spa_errlog_drain(spa_t *spa) { spa_error_entry_t *se; void *cookie; mutex_enter(&spa->spa_errlist_lock); cookie = NULL; while ((se = avl_destroy_nodes(&spa->spa_errlist_last, &cookie)) != NULL) kmem_free(se, sizeof (spa_error_entry_t)); cookie = NULL; while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub, &cookie)) != NULL) kmem_free(se, sizeof (spa_error_entry_t)); mutex_exit(&spa->spa_errlist_lock); } /* * Process a list of errors into the current on-disk log. */ void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) { spa_error_entry_t *se; char buf[NAME_MAX_LEN]; void *cookie; if (avl_numnodes(t) == 0) return; /* create log if necessary */ if (*obj == 0) *obj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); /* add errors to the current log */ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { bookmark_to_name(&se->se_bookmark, buf, sizeof (buf)); const char *name = se->se_name ? se->se_name : ""; (void) zap_update(spa->spa_meta_objset, *obj, buf, 1, strlen(name) + 1, name, tx); } } else { for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { zbookmark_err_phys_t zep; zep.zb_object = se->se_bookmark.zb_object; zep.zb_level = se->se_bookmark.zb_level; zep.zb_blkid = se->se_bookmark.zb_blkid; + zep.zb_birth = 0; /* * If we cannot find out the head dataset and birth txg * of the present error block, we simply continue. * Reinserting that error block to the error lists, * even if we are not syncing the final txg, results * in duplicate posting of errors. */ uint64_t head_dataset_obj; int error = get_head_and_birth_txg(spa, &zep, se->se_bookmark.zb_objset, &head_dataset_obj); - if (error != 0) + if (error) continue; uint64_t err_obj; error = zap_lookup_int_key(spa->spa_meta_objset, *obj, head_dataset_obj, &err_obj); if (error == ENOENT) { err_obj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); (void) zap_update_int_key(spa->spa_meta_objset, *obj, head_dataset_obj, err_obj, tx); } errphys_to_name(&zep, buf, sizeof (buf)); const char *name = se->se_name ? se->se_name : ""; (void) zap_update(spa->spa_meta_objset, err_obj, buf, 1, strlen(name) + 1, name, tx); } } /* purge the error list */ cookie = NULL; while ((se = avl_destroy_nodes(t, &cookie)) != NULL) kmem_free(se, sizeof (spa_error_entry_t)); } static void delete_errlog(spa_t *spa, uint64_t spa_err_obj, dmu_tx_t *tx) { if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { VERIFY0(dmu_object_free(spa->spa_meta_objset, za.za_first_integer, tx)); } zap_cursor_fini(&zc); } VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx)); } /* * Sync the error log out to disk. This is a little tricky because the act of * writing the error log requires the spa_errlist_lock. So, we need to lock the * error lists, take a copy of the lists, and then reinitialize them. Then, we * drop the error list lock and take the error log lock, at which point we * do the errlog processing. Then, if we encounter an I/O error during this * process, we can successfully add the error to the list. Note that this will * result in the perpetual recycling of errors, but it is an unlikely situation * and not a performance critical operation. */ void spa_errlog_sync(spa_t *spa, uint64_t txg) { dmu_tx_t *tx; avl_tree_t scrub, last; int scrub_finished; mutex_enter(&spa->spa_errlist_lock); /* * Bail out early under normal circumstances. */ if (avl_numnodes(&spa->spa_errlist_scrub) == 0 && avl_numnodes(&spa->spa_errlist_last) == 0 && avl_numnodes(&spa->spa_errlist_healed) == 0 && !spa->spa_scrub_finished) { mutex_exit(&spa->spa_errlist_lock); return; } spa_get_errlists(spa, &last, &scrub); scrub_finished = spa->spa_scrub_finished; spa->spa_scrub_finished = B_FALSE; mutex_exit(&spa->spa_errlist_lock); mutex_enter(&spa->spa_errlog_lock); tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); /* * Remove healed errors from errors. */ spa_remove_healed_errors(spa, &last, &scrub, tx); /* * Sync out the current list of errors. */ sync_error_list(spa, &last, &spa->spa_errlog_last, tx); /* * Rotate the log if necessary. */ if (scrub_finished) { if (spa->spa_errlog_last != 0) delete_errlog(spa, spa->spa_errlog_last, tx); spa->spa_errlog_last = spa->spa_errlog_scrub; spa->spa_errlog_scrub = 0; sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx); } /* * Sync out any pending scrub errors. */ sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx); /* * Update the MOS to reflect the new values. */ (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1, &spa->spa_errlog_last, tx); (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1, &spa->spa_errlog_scrub, tx); dmu_tx_commit(tx); mutex_exit(&spa->spa_errlog_lock); } static void delete_dataset_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t ds, dmu_tx_t *tx) { if (spa_err_obj == 0) return; zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t head_ds; name_to_object(za.za_name, &head_ds); if (head_ds == ds) { (void) zap_remove(spa->spa_meta_objset, spa_err_obj, za.za_name, tx); VERIFY0(dmu_object_free(spa->spa_meta_objset, za.za_first_integer, tx)); break; } } zap_cursor_fini(&zc); } void spa_delete_dataset_errlog(spa_t *spa, uint64_t ds, dmu_tx_t *tx) { mutex_enter(&spa->spa_errlog_lock); delete_dataset_errlog(spa, spa->spa_errlog_scrub, ds, tx); delete_dataset_errlog(spa, spa->spa_errlog_last, ds, tx); mutex_exit(&spa->spa_errlog_lock); } static int find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head, uint64_t *txg) { dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; int error = dsl_dataset_hold_obj(dp, old_head, FTAG, &ds); if (error != 0) return (error); uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; while (prev_obj != 0) { dsl_dataset_rele(ds, FTAG); if ((error = dsl_dataset_hold_obj(dp, prev_obj, FTAG, &ds)) == 0 && dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj == new_head) break; if (error != 0) return (error); prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } dsl_dataset_rele(ds, FTAG); ASSERT(prev_obj != 0); *txg = prev_obj_txg; return (0); } static void swap_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t new_head, uint64_t old_head, dmu_tx_t *tx) { if (spa_err_obj == 0) return; uint64_t old_head_errlog; int error = zap_lookup_int_key(spa->spa_meta_objset, spa_err_obj, old_head, &old_head_errlog); /* If no error log, then there is nothing to do. */ if (error != 0) return; uint64_t txg; error = find_txg_ancestor_snapshot(spa, new_head, old_head, &txg); if (error != 0) return; /* * Create an error log if the file system being promoted does not * already have one. */ uint64_t new_head_errlog; error = zap_lookup_int_key(spa->spa_meta_objset, spa_err_obj, new_head, &new_head_errlog); if (error != 0) { new_head_errlog = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); (void) zap_update_int_key(spa->spa_meta_objset, spa_err_obj, new_head, new_head_errlog, tx); } zap_cursor_t zc; zap_attribute_t za; zbookmark_err_phys_t err_block; for (zap_cursor_init(&zc, spa->spa_meta_objset, old_head_errlog); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { const char *name = ""; name_to_errphys(za.za_name, &err_block); if (err_block.zb_birth < txg) { (void) zap_update(spa->spa_meta_objset, new_head_errlog, za.za_name, 1, strlen(name) + 1, name, tx); (void) zap_remove(spa->spa_meta_objset, old_head_errlog, za.za_name, tx); } } zap_cursor_fini(&zc); } void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds, uint64_t old_head_ds, dmu_tx_t *tx) { mutex_enter(&spa->spa_errlog_lock); swap_errlog(spa, spa->spa_errlog_scrub, new_head_ds, old_head_ds, tx); swap_errlog(spa, spa->spa_errlog_last, new_head_ds, old_head_ds, tx); mutex_exit(&spa->spa_errlog_lock); } #if defined(_KERNEL) /* error handling */ EXPORT_SYMBOL(spa_log_error); EXPORT_SYMBOL(spa_get_errlog_size); EXPORT_SYMBOL(spa_get_errlog); EXPORT_SYMBOL(spa_errlog_rotate); EXPORT_SYMBOL(spa_errlog_drain); EXPORT_SYMBOL(spa_errlog_sync); EXPORT_SYMBOL(spa_get_errlists); EXPORT_SYMBOL(spa_delete_dataset_errlog); EXPORT_SYMBOL(spa_swap_errlog); EXPORT_SYMBOL(sync_error_list); EXPORT_SYMBOL(spa_upgrade_errlog); #endif /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, spa_, upgrade_errlog_limit, INT, ZMOD_RW, "Limit the number of errors which will be upgraded to the new " "on-disk error log when enabling head_errlog"); /* END CSTYLED */ diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 414316eb6559..b9a9e0efcc82 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -1,971 +1,971 @@ # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # # This run file contains all of the common functional tests. When # adding a new test consider also adding it to the sanity.run file # if the new test runs to completion in only a few seconds. # # Approximate run time: 4-5 hours # [DEFAULT] pre = setup quiet = False pre_user = root user = root timeout = 600 post_user = root post = cleanup failsafe_user = root failsafe = callbacks/zfs_failsafe outputdir = /var/tmp/test_results tags = ['functional'] [tests/functional/acl/off] tests = ['dosmode', 'posixmode'] tags = ['functional', 'acl'] [tests/functional/alloc_class] tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos', 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos', 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos', 'alloc_class_013_pos'] tags = ['functional', 'alloc_class'] [tests/functional/append] tests = ['file_append', 'threadsappend_001_pos'] tags = ['functional', 'append'] [tests/functional/arc] tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos', 'arcstats_runtime_tuning'] tags = ['functional', 'arc'] [tests/functional/atime] tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on'] tags = ['functional', 'atime'] [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos', 'bootfs_008_pos'] tags = ['functional', 'bootfs'] [tests/functional/btree] tests = ['btree_positive', 'btree_negative'] tags = ['functional', 'btree'] pre = post = [tests/functional/cache] tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg', 'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg', 'cache_009_pos', 'cache_010_pos', 'cache_011_pos', 'cache_012_pos'] tags = ['functional', 'cache'] [tests/functional/cachefile] tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos', 'cachefile_004_pos'] tags = ['functional', 'cachefile'] [tests/functional/casenorm] tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure', 'sensitive_none_lookup', 'sensitive_none_delete', 'sensitive_formd_lookup', 'sensitive_formd_delete', 'insensitive_none_lookup', 'insensitive_none_delete', 'insensitive_formd_lookup', 'insensitive_formd_delete', 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete', 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete'] tags = ['functional', 'casenorm'] [tests/functional/channel_program/lua_core] tests = ['tst.args_to_lua', 'tst.divide_by_zero', 'tst.exists', 'tst.integer_illegal', 'tst.integer_overflow', 'tst.language_functions_neg', 'tst.language_functions_pos', 'tst.large_prog', 'tst.libraries', 'tst.memory_limit', 'tst.nested_neg', 'tst.nested_pos', 'tst.nvlist_to_lua', 'tst.recursive_neg', 'tst.recursive_pos', 'tst.return_large', 'tst.return_nvlist_neg', 'tst.return_nvlist_pos', 'tst.return_recursive_table', 'tst.stack_gsub', 'tst.timeout'] tags = ['functional', 'channel_program', 'lua_core'] [tests/functional/channel_program/synctask_core] tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit', 'tst.get_index_props', 'tst.get_mountpoint', 'tst.get_neg', 'tst.get_number_props', 'tst.get_string_props', 'tst.get_type', 'tst.get_userquota', 'tst.get_written', 'tst.inherit', 'tst.list_bookmarks', 'tst.list_children', 'tst.list_clones', 'tst.list_holds', 'tst.list_snapshots', 'tst.list_system_props', 'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict', 'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult', 'tst.rollback_one', 'tst.set_props', 'tst.snapshot_destroy', 'tst.snapshot_neg', 'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.bookmark.create', 'tst.bookmark.copy', 'tst.terminate_by_signal' ] tags = ['functional', 'channel_program', 'synctask_core'] [tests/functional/checksum] tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'run_blake3_test', 'filetest_001_pos', 'filetest_002_pos'] tags = ['functional', 'checksum'] [tests/functional/clean_mirror] tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos', 'clean_mirror_003_pos', 'clean_mirror_004_pos'] tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress', 'zdb_display_block', 'zdb_label_checksum', 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2'] pre = post = tags = ['functional', 'cli_root', 'zdb'] [tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos'] tags = ['functional', 'cli_root', 'zfs'] [tests/functional/cli_root/zfs_bookmark] tests = ['zfs_bookmark_cliargs'] tags = ['functional', 'cli_root', 'zfs_bookmark'] [tests/functional/cli_root/zfs_change-key] tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones'] tags = ['functional', 'cli_root', 'zfs_change-key'] [tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', 'zfs_clone_010_pos', 'zfs_clone_encrypted', 'zfs_clone_deeply_nested'] tags = ['functional', 'cli_root', 'zfs_clone'] [tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', 'zfs_copies_004_neg', 'zfs_copies_005_neg', 'zfs_copies_006_pos'] tags = ['functional', 'cli_root', 'zfs_copies'] [tests/functional/cli_root/zfs_create] tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted', 'zfs_create_crypt_combos', 'zfs_create_dryrun', 'zfs_create_nomount', 'zfs_create_verbose'] tags = ['functional', 'cli_root', 'zfs_create'] [tests/functional/cli_root/zfs_destroy] tests = ['zfs_clone_livelist_condense_and_disable', 'zfs_clone_livelist_condense_races', 'zfs_clone_livelist_dedup', 'zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', 'zfs_destroy_004_pos', 'zfs_destroy_005_neg', 'zfs_destroy_006_neg', 'zfs_destroy_007_neg', 'zfs_destroy_008_pos', 'zfs_destroy_009_pos', 'zfs_destroy_010_pos', 'zfs_destroy_011_pos', 'zfs_destroy_012_pos', 'zfs_destroy_013_neg', 'zfs_destroy_014_pos', 'zfs_destroy_015_pos', 'zfs_destroy_016_pos', 'zfs_destroy_clone_livelist', 'zfs_destroy_dev_removal', 'zfs_destroy_dev_removal_condense'] tags = ['functional', 'cli_root', 'zfs_destroy'] [tests/functional/cli_root/zfs_diff] tests = ['zfs_diff_changes', 'zfs_diff_cliargs', 'zfs_diff_timestamp', 'zfs_diff_types', 'zfs_diff_encrypted', 'zfs_diff_mangle'] tags = ['functional', 'cli_root', 'zfs_diff'] [tests/functional/cli_root/zfs_get] tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', 'zfs_get_004_pos', 'zfs_get_005_neg', 'zfs_get_006_neg', 'zfs_get_007_neg', 'zfs_get_008_pos', 'zfs_get_009_pos', 'zfs_get_010_neg'] tags = ['functional', 'cli_root', 'zfs_get'] [tests/functional/cli_root/zfs_ids_to_path] tests = ['zfs_ids_to_path_001_pos'] tags = ['functional', 'cli_root', 'zfs_ids_to_path'] [tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos', 'zfs_inherit_mountpoint'] tags = ['functional', 'cli_root', 'zfs_inherit'] [tests/functional/cli_root/zfs_load-key] tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', 'zfs_load-key_https', 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] tags = ['functional', 'cli_root', 'zfs_load-key'] [tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_pos', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', 'zfs_mount_test_race'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] tests = ['zfs_program_json'] tags = ['functional', 'cli_root', 'zfs_program'] [tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] tags = ['functional', 'cli_root', 'zfs_promote'] [tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] tags = ['functional', 'cli_root', 'zfs_property'] [tests/functional/cli_root/zfs_receive] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_004_neg', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', 'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos', 'zfs_receive_016_pos', 'receive-o-x_props_override', 'receive-o-x_props_aliases', 'zfs_receive_from_encrypted', 'zfs_receive_to_encrypted', 'zfs_receive_raw', 'zfs_receive_raw_incremental', 'zfs_receive_-e', 'zfs_receive_raw_-d', 'zfs_receive_from_zstd', 'zfs_receive_new_props', 'zfs_receive_-wR-encrypted-mix', 'zfs_receive_corrective', 'zfs_receive_compressed_corrective'] tags = ['functional', 'cli_root', 'zfs_receive'] [tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child', 'zfs_rename_to_encrypted', 'zfs_rename_mountpoint', 'zfs_rename_nounmount'] tags = ['functional', 'cli_root', 'zfs_rename'] [tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] tags = ['functional', 'cli_root', 'zfs_reservation'] [tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', 'zfs_rollback_003_neg', 'zfs_rollback_004_neg'] tags = ['functional', 'cli_root', 'zfs_rollback'] [tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw', 'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing'] tags = ['functional', 'cli_root', 'zfs_send'] [tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos', 'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos', 'mountpoint_002_pos', 'reservation_001_neg', 'user_property_002_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos', 'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', 'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos', 'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation', 'zfs_set_feature_activation'] tags = ['functional', 'cli_root', 'zfs_set'] [tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', 'zfs_share_004_pos', 'zfs_share_006_pos', 'zfs_share_008_neg', 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares'] tags = ['functional', 'cli_root', 'zfs_share'] [tests/functional/cli_root/zfs_snapshot] tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] tags = ['functional', 'cli_root', 'zfs_snapshot'] [tests/functional/cli_root/zfs_unload-key] tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] tags = ['functional', 'cli_root', 'zfs_unload-key'] [tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', 'zfs_unmount_007_neg', 'zfs_unmount_008_neg', 'zfs_unmount_009_pos', 'zfs_unmount_all_001_pos', 'zfs_unmount_nested', 'zfs_unmount_unload_keys'] tags = ['functional', 'cli_root', 'zfs_unmount'] [tests/functional/cli_root/zfs_unshare] tests = ['zfs_unshare_001_pos', 'zfs_unshare_002_pos', 'zfs_unshare_003_pos', 'zfs_unshare_004_neg', 'zfs_unshare_005_neg', 'zfs_unshare_006_pos', 'zfs_unshare_007_pos', 'zfs_unshare_008_pos'] tags = ['functional', 'cli_root', 'zfs_unshare'] [tests/functional/cli_root/zfs_upgrade] tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos', 'zfs_upgrade_004_pos', 'zfs_upgrade_005_pos', 'zfs_upgrade_006_neg', 'zfs_upgrade_007_neg'] tags = ['functional', 'cli_root', 'zfs_upgrade'] [tests/functional/cli_root/zfs_wait] tests = ['zfs_wait_deleteq', 'zfs_wait_getsubopt'] tags = ['functional', 'cli_root', 'zfs_wait'] [tests/functional/cli_root/zhack] tests = ['zhack_label_checksum'] pre = post = tags = ['functional', 'cli_root', 'zhack'] [tests/functional/cli_root/zpool] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors'] tags = ['functional', 'cli_root', 'zpool'] [tests/functional/cli_root/zpool_add] tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', 'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg', 'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos', 'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_add'] [tests/functional/cli_root/zpool_attach] tests = ['zpool_attach_001_neg', 'attach-o_ashift'] tags = ['functional', 'cli_root', 'zpool_attach'] [tests/functional/cli_root/zpool_clear] tests = ['zpool_clear_001_pos', 'zpool_clear_002_neg', 'zpool_clear_003_neg', 'zpool_clear_readonly'] tags = ['functional', 'cli_root', 'zpool_clear'] [tests/functional/cli_root/zpool_create] tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_003_pos', 'zpool_create_004_pos', 'zpool_create_005_pos', 'zpool_create_006_pos', 'zpool_create_007_neg', 'zpool_create_008_pos', 'zpool_create_009_neg', 'zpool_create_010_neg', 'zpool_create_011_neg', 'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos', 'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_features_005_pos', 'zpool_create_features_006_pos', 'zpool_create_features_007_pos', 'zpool_create_features_008_pos', 'zpool_create_features_009_pos', 'create-o_ashift', 'zpool_create_tempname', 'zpool_create_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_create'] [tests/functional/cli_root/zpool_destroy] tests = ['zpool_destroy_001_pos', 'zpool_destroy_002_pos', 'zpool_destroy_003_neg'] pre = post = tags = ['functional', 'cli_root', 'zpool_destroy'] [tests/functional/cli_root/zpool_detach] tests = ['zpool_detach_001_neg'] tags = ['functional', 'cli_root', 'zpool_detach'] [tests/functional/cli_root/zpool_events] tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow', 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates', 'zpool_events_clear_retained'] tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] tests = ['zpool_export_001_pos', 'zpool_export_002_pos', 'zpool_export_003_neg', 'zpool_export_004_pos'] tags = ['functional', 'cli_root', 'zpool_export'] [tests/functional/cli_root/zpool_get] tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos', 'zpool_get_004_neg', 'zpool_get_005_pos'] tags = ['functional', 'cli_root', 'zpool_get'] [tests/functional/cli_root/zpool_history] tests = ['zpool_history_001_neg', 'zpool_history_002_pos'] tags = ['functional', 'cli_root', 'zpool_history'] [tests/functional/cli_root/zpool_import] tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'zpool_import_003_pos', 'zpool_import_004_pos', 'zpool_import_005_pos', 'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos', 'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg', 'zpool_import_012_pos', 'zpool_import_013_neg', 'zpool_import_014_pos', 'zpool_import_015_pos', 'zpool_import_016_pos', 'zpool_import_017_pos', 'zpool_import_features_001_pos', 'zpool_import_features_002_neg', 'zpool_import_features_003_pos', 'zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos', 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos', 'zpool_import_encrypted', 'zpool_import_encrypted_load', 'zpool_import_errata3', 'zpool_import_errata4', 'import_cachefile_device_added', 'import_cachefile_device_removed', 'import_cachefile_device_replaced', 'import_cachefile_mirror_attached', 'import_cachefile_mirror_detached', 'import_cachefile_paths_changed', 'import_cachefile_shared_device', 'import_devices_missing', 'import_paths_changed', 'import_rewind_config_changed', 'import_rewind_device_replaced'] tags = ['functional', 'cli_root', 'zpool_import'] timeout = 1200 [tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported', 'zpool_labelclear_removed', 'zpool_labelclear_valid'] pre = post = tags = ['functional', 'cli_root', 'zpool_labelclear'] [tests/functional/cli_root/zpool_initialize] tests = ['zpool_initialize_attach_detach_add_remove', 'zpool_initialize_fault_export_import_online', 'zpool_initialize_import_export', 'zpool_initialize_offline_export_import_online', 'zpool_initialize_online_offline', 'zpool_initialize_split', 'zpool_initialize_start_and_cancel_neg', 'zpool_initialize_start_and_cancel_pos', 'zpool_initialize_suspend_resume', 'zpool_initialize_unsupported_vdevs', 'zpool_initialize_verify_checksums', 'zpool_initialize_verify_initialized'] pre = tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', 'zpool_offline_003_pos'] tags = ['functional', 'cli_root', 'zpool_offline'] [tests/functional/cli_root/zpool_online] tests = ['zpool_online_001_pos', 'zpool_online_002_neg'] tags = ['functional', 'cli_root', 'zpool_online'] [tests/functional/cli_root/zpool_remove] tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos', 'zpool_remove_003_pos'] tags = ['functional', 'cli_root', 'zpool_remove'] [tests/functional/cli_root/zpool_replace] tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift'] tags = ['functional', 'cli_root', 'zpool_replace'] [tests/functional/cli_root/zpool_resilver] tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart'] tags = ['functional', 'cli_root', 'zpool_resilver'] [tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing', 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] tags = ['functional', 'cli_root', 'zpool_scrub'] [tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg', 'zpool_set_ashift', 'zpool_set_features'] tags = ['functional', 'cli_root', 'zpool_set'] [tests/functional/cli_root/zpool_split] tests = ['zpool_split_cliargs', 'zpool_split_devices', 'zpool_split_encryption', 'zpool_split_props', 'zpool_split_vdevs', 'zpool_split_resilver', 'zpool_split_indirect', 'zpool_split_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] tests = ['zpool_status_001_pos', 'zpool_status_002_pos', 'zpool_status_003_pos', 'zpool_status_004_pos', - 'zpool_status_features_001_pos'] + 'zpool_status_005_pos', 'zpool_status_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] tests = ['zpool_sync_001_pos', 'zpool_sync_002_neg'] tags = ['functional', 'cli_root', 'zpool_sync'] [tests/functional/cli_root/zpool_trim] tests = ['zpool_trim_attach_detach_add_remove', 'zpool_trim_fault_export_import_online', 'zpool_trim_import_export', 'zpool_trim_multiple', 'zpool_trim_neg', 'zpool_trim_offline_export_import_online', 'zpool_trim_online_offline', 'zpool_trim_partial', 'zpool_trim_rate', 'zpool_trim_rate_neg', 'zpool_trim_secure', 'zpool_trim_split', 'zpool_trim_start_and_cancel_neg', 'zpool_trim_start_and_cancel_pos', 'zpool_trim_suspend_resume', 'zpool_trim_unsupported_vdevs', 'zpool_trim_verify_checksums', 'zpool_trim_verify_trimmed'] tags = ['functional', 'zpool_trim'] [tests/functional/cli_root/zpool_upgrade] tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_002_pos', 'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos', 'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg', 'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos', 'zpool_upgrade_009_neg', 'zpool_upgrade_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_upgrade'] [tests/functional/cli_root/zpool_wait] tests = ['zpool_wait_discard', 'zpool_wait_freeing', 'zpool_wait_initialize_basic', 'zpool_wait_initialize_cancel', 'zpool_wait_initialize_flag', 'zpool_wait_multiple', 'zpool_wait_no_activity', 'zpool_wait_remove', 'zpool_wait_remove_cancel', 'zpool_wait_trim_basic', 'zpool_wait_trim_cancel', 'zpool_wait_trim_flag', 'zpool_wait_usage'] tags = ['functional', 'cli_root', 'zpool_wait'] [tests/functional/cli_root/zpool_wait/scan] tests = ['zpool_wait_replace_cancel', 'zpool_wait_rebuild', 'zpool_wait_resilver', 'zpool_wait_scrub_cancel', 'zpool_wait_replace', 'zpool_wait_scrub_basic', 'zpool_wait_scrub_flag'] tags = ['functional', 'cli_root', 'zpool_wait'] [tests/functional/cli_user/misc] tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg', 'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg', 'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg', 'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg', 'zfs_share_001_neg', 'zfs_snapshot_001_neg', 'zfs_unallow_001_neg', 'zfs_unmount_001_neg', 'zfs_unshare_001_neg', 'zfs_upgrade_001_neg', 'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg', 'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg', 'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg', 'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege'] user = tags = ['functional', 'cli_user', 'misc'] [tests/functional/cli_user/zfs_list] tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos', 'zfs_list_004_neg', 'zfs_list_005_neg', 'zfs_list_007_pos', 'zfs_list_008_neg'] user = tags = ['functional', 'cli_user', 'zfs_list'] [tests/functional/cli_user/zpool_iostat] tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos', 'zpool_iostat_003_neg', 'zpool_iostat_004_pos', 'zpool_iostat_005_pos', 'zpool_iostat_-c_disable', 'zpool_iostat_-c_homedir', 'zpool_iostat_-c_searchpath'] user = tags = ['functional', 'cli_user', 'zpool_iostat'] [tests/functional/cli_user/zpool_list] tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = tags = ['functional', 'cli_user', 'zpool_list'] [tests/functional/cli_user/zpool_status] tests = ['zpool_status_003_pos', 'zpool_status_-c_disable', 'zpool_status_-c_homedir', 'zpool_status_-c_searchpath'] user = tags = ['functional', 'cli_user', 'zpool_status'] [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'l2arc_compressed_arc', 'l2arc_compressed_arc_disabled', 'l2arc_encrypted', 'l2arc_encrypted_no_compressed_arc'] tags = ['functional', 'compression'] [tests/functional/cp_files] tests = ['cp_files_001_pos'] tags = ['functional', 'cp_files'] [tests/functional/crtime] tests = ['crtime_001_pos' ] tags = ['functional', 'crtime'] [tests/functional/ctime] tests = ['ctime_001_pos' ] tags = ['functional', 'ctime'] [tests/functional/deadman] tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio'] pre = post = tags = ['functional', 'deadman'] [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', 'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg', 'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg', 'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos', 'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] tags = ['functional', 'delegate'] [tests/functional/exec] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] [tests/functional/fallocate] tests = ['fallocate_punch-hole'] tags = ['functional', 'fallocate'] [tests/functional/features/async_destroy] tests = ['async_destroy_001_pos'] tags = ['functional', 'features', 'async_destroy'] [tests/functional/features/large_dnode] tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos'] tags = ['functional', 'features', 'large_dnode'] [tests/functional/grow] pre = post = tests = ['grow_pool_001_pos', 'grow_replicas_001_pos'] tags = ['functional', 'grow'] [tests/functional/history] tests = ['history_001_pos', 'history_002_pos', 'history_003_pos', 'history_004_pos', 'history_005_neg', 'history_006_neg', 'history_007_pos', 'history_008_pos', 'history_009_pos', 'history_010_pos'] tags = ['functional', 'history'] [tests/functional/hkdf] pre = post = tests = ['hkdf_test'] tags = ['functional', 'hkdf'] [tests/functional/inheritance] tests = ['inherit_001_pos'] pre = tags = ['functional', 'inheritance'] [tests/functional/io] tests = ['sync', 'psync', 'posixaio', 'mmap'] tags = ['functional', 'io'] [tests/functional/inuse] tests = ['inuse_004_pos', 'inuse_005_pos', 'inuse_008_pos', 'inuse_009_pos'] post = tags = ['functional', 'inuse'] [tests/functional/large_files] tests = ['large_files_001_pos', 'large_files_002_pos'] tags = ['functional', 'large_files'] [tests/functional/limits] tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count', 'snapshot_limit'] tags = ['functional', 'limits'] [tests/functional/link_count] tests = ['link_count_001', 'link_count_root_inode'] tags = ['functional', 'link_count'] [tests/functional/migration] tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', 'migration_004_pos', 'migration_005_pos', 'migration_006_pos', 'migration_007_pos', 'migration_008_pos', 'migration_009_pos', 'migration_010_pos', 'migration_011_pos', 'migration_012_pos'] tags = ['functional', 'migration'] [tests/functional/mmap] tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos', 'mmap_sync_001_pos'] tags = ['functional', 'mmap'] [tests/functional/mount] tests = ['umount_001', 'umountall_001'] tags = ['functional', 'mount'] [tests/functional/mv_files] tests = ['mv_files_001_pos', 'mv_files_002_pos', 'random_creation'] tags = ['functional', 'mv_files'] [tests/functional/nestedfs] tests = ['nestedfs_001_pos'] tags = ['functional', 'nestedfs'] [tests/functional/no_space] tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos', 'enospc_df', 'enospc_rm'] tags = ['functional', 'no_space'] [tests/functional/nopwrite] tests = ['nopwrite_copies', 'nopwrite_mtime', 'nopwrite_negative', 'nopwrite_promoted_clone', 'nopwrite_recsize', 'nopwrite_sync', 'nopwrite_varying_compression', 'nopwrite_volume'] tags = ['functional', 'nopwrite'] [tests/functional/online_offline] tests = ['online_offline_001_pos', 'online_offline_002_neg', 'online_offline_003_neg'] tags = ['functional', 'online_offline'] [tests/functional/pool_checkpoint] tests = ['checkpoint_after_rewind', 'checkpoint_big_rewind', 'checkpoint_capacity', 'checkpoint_conf_change', 'checkpoint_discard', 'checkpoint_discard_busy', 'checkpoint_discard_many', 'checkpoint_indirect', 'checkpoint_invalid', 'checkpoint_lun_expsz', 'checkpoint_open', 'checkpoint_removal', 'checkpoint_rewind', 'checkpoint_ro_rewind', 'checkpoint_sm_scale', 'checkpoint_twice', 'checkpoint_vdev_add', 'checkpoint_zdb', 'checkpoint_zhack_feat'] tags = ['functional', 'pool_checkpoint'] timeout = 1800 [tests/functional/pool_names] tests = ['pool_names_001_pos', 'pool_names_002_neg'] pre = post = tags = ['functional', 'pool_names'] [tests/functional/poolversion] tests = ['poolversion_001_pos', 'poolversion_002_pos'] tags = ['functional', 'poolversion'] [tests/functional/pyzfs] tests = ['pyzfs_unittest'] pre = post = tags = ['functional', 'pyzfs'] [tests/functional/quota] tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos', 'quota_005_pos', 'quota_006_neg'] tags = ['functional', 'quota'] [tests/functional/redacted_send] tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted', 'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes', 'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones', 'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative', 'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume', 'redacted_size', 'redacted_volume'] tags = ['functional', 'redacted_send'] [tests/functional/raidz] tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos'] tags = ['functional', 'raidz'] [tests/functional/redundancy] tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3', 'redundancy_draid_damaged1', 'redundancy_draid_damaged2', 'redundancy_draid_spare1', 'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe'] tags = ['functional', 'redundancy'] timeout = 1200 [tests/functional/refquota] tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos', 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg', 'refquota_007_neg', 'refquota_008_neg'] tags = ['functional', 'refquota'] [tests/functional/refreserv] tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos', 'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz', 'refreserv_raidz'] tags = ['functional', 'refreserv'] [tests/functional/removal] pre = tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', 'removal_with_errors', 'removal_with_export', 'removal_with_ganging', 'removal_with_faulted', 'removal_with_remove', 'removal_with_scrub', 'removal_with_send', 'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write', 'removal_with_zdb', 'remove_expanded', 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz', 'remove_indirect', 'remove_attach_mirror'] tags = ['functional', 'removal'] [tests/functional/rename_dirs] tests = ['rename_dirs_001_pos'] tags = ['functional', 'rename_dirs'] [tests/functional/replacement] tests = ['attach_import', 'attach_multiple', 'attach_rebuild', 'attach_resilver', 'detach', 'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild', 'replace_resilver', 'resilver_restart_001', 'resilver_restart_002', 'scrub_cancel'] tags = ['functional', 'replacement'] [tests/functional/reservation] tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', 'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos', 'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos', 'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos', 'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos', 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos', 'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg', 'reservation_022_pos'] tags = ['functional', 'reservation'] [tests/functional/rootpool] tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos'] tags = ['functional', 'rootpool'] [tests/functional/rsend] tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos', 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', 'rsend_013_pos', 'rsend_014_pos', 'rsend_016_neg', 'rsend_019_pos', 'rsend_020_pos', 'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos', 'rsend_025_pos', 'rsend_026_neg', 'rsend_027_pos', 'rsend_028_neg', 'rsend_029_neg', 'send-c_verify_ratio', 'send-c_verify_contents', 'send-c_props', 'send-c_incremental', 'send-c_volume', 'send-c_zstreamdump', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files', 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', 'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset', 'send_invalid', 'send_doall', 'send_raw_spill_block', 'send_raw_ashift'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', 'scrub_mirror_003_pos', 'scrub_mirror_004_pos'] tags = ['functional', 'scrub_mirror'] [tests/functional/slog] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001', 'slog_replay_fs_002', 'slog_replay_volume', 'slog_016_pos'] tags = ['functional', 'slog'] [tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', 'rollback_003_pos', 'snapshot_001_pos', 'snapshot_002_pos', 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos', 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_017_pos', 'snapshot_018_pos'] tags = ['functional', 'snapshot'] [tests/functional/snapused] tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos', 'snapused_004_pos', 'snapused_005_pos'] tags = ['functional', 'snapused'] [tests/functional/sparse] tests = ['sparse_001_pos'] tags = ['functional', 'sparse'] [tests/functional/stat] tests = ['stat_001_pos'] tags = ['functional', 'stat'] [tests/functional/suid] tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', 'suid_write_to_none', 'suid_write_zil_replay'] tags = ['functional', 'suid'] [tests/functional/trim] tests = ['autotrim_integrity', 'autotrim_config', 'autotrim_trim_integrity', 'trim_integrity', 'trim_config', 'trim_l2arc'] tags = ['functional', 'trim'] [tests/functional/truncate] tests = ['truncate_001_pos', 'truncate_002_pos', 'truncate_timestamps'] tags = ['functional', 'truncate'] [tests/functional/upgrade] tests = ['upgrade_userobj_001_pos', 'upgrade_readonly_pool'] tags = ['functional', 'upgrade'] [tests/functional/userquota] tests = [ 'userquota_001_pos', 'userquota_002_pos', 'userquota_003_pos', 'userquota_004_pos', 'userquota_005_neg', 'userquota_006_pos', 'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos', 'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg', 'userspace_001_pos', 'userspace_002_pos', 'userspace_encrypted', 'userspace_send_encrypted'] tags = ['functional', 'userquota'] [tests/functional/vdev_zaps] tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos', 'vdev_zaps_007_pos'] tags = ['functional', 'vdev_zaps'] [tests/functional/write_dirs] tests = ['write_dirs_001_pos', 'write_dirs_002_pos'] tags = ['functional', 'write_dirs'] [tests/functional/xattr] tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos', 'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', 'xattr_011_pos', 'xattr_012_pos', 'xattr_013_pos', 'xattr_compat'] tags = ['functional', 'xattr'] [tests/functional/zvol/zvol_ENOSPC] tests = ['zvol_ENOSPC_001_pos'] tags = ['functional', 'zvol', 'zvol_ENOSPC'] [tests/functional/zvol/zvol_cli] tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg'] tags = ['functional', 'zvol', 'zvol_cli'] [tests/functional/zvol/zvol_misc] tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse', 'zvol_misc_snapdev', 'zvol_misc_trim', 'zvol_misc_volmode', 'zvol_misc_zil'] tags = ['functional', 'zvol', 'zvol_misc'] [tests/functional/zvol/zvol_stress] tests = ['zvol_stress'] tags = ['functional', 'zvol', 'zvol_stress'] [tests/functional/zvol/zvol_swap] tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos'] tags = ['functional', 'zvol', 'zvol_swap'] [tests/functional/libzfs] tests = ['many_fds', 'libzfs_input'] tags = ['functional', 'libzfs'] [tests/functional/log_spacemap] tests = ['log_spacemap_import_logs'] pre = post = tags = ['functional', 'log_spacemap'] [tests/functional/l2arc] tests = ['l2arc_arcstats_pos', 'l2arc_mfuonly_pos', 'l2arc_l2miss_pos', 'persist_l2arc_001_pos', 'persist_l2arc_002_pos', 'persist_l2arc_003_neg', 'persist_l2arc_004_pos', 'persist_l2arc_005_pos'] tags = ['functional', 'l2arc'] [tests/functional/zpool_influxdb] tests = ['zpool_influxdb'] tags = ['functional', 'zpool_influxdb'] diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index d65788d0868b..4a815db8a6d6 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1,1993 +1,1994 @@ CLEANFILES = dist_noinst_DATA = include $(top_srcdir)/config/Substfiles.am datadir_zfs_tests_testsdir = $(datadir)/$(PACKAGE)/zfs-tests/tests nobase_dist_datadir_zfs_tests_tests_DATA = \ perf/nfs-sample.cfg \ perf/perf.shlib \ \ perf/fio/mkfiles.fio \ perf/fio/random_reads.fio \ perf/fio/random_readwrite.fio \ perf/fio/random_readwrite_fixed.fio \ perf/fio/random_writes.fio \ perf/fio/sequential_reads.fio \ perf/fio/sequential_readwrite.fio \ perf/fio/sequential_writes.fio nobase_dist_datadir_zfs_tests_tests_SCRIPTS = \ perf/regression/random_reads.ksh \ perf/regression/random_readwrite.ksh \ perf/regression/random_readwrite_fixed.ksh \ perf/regression/random_writes.ksh \ perf/regression/random_writes_zil.ksh \ perf/regression/sequential_reads_arc_cached_clone.ksh \ perf/regression/sequential_reads_arc_cached.ksh \ perf/regression/sequential_reads_dbuf_cached.ksh \ perf/regression/sequential_reads.ksh \ perf/regression/sequential_writes.ksh \ perf/regression/setup.ksh \ \ perf/scripts/prefetch_io.sh # These lists can be regenerated by running make regen-tests at the root, or, on a *clean* source: # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' ! -executable -name '*.in' | sort | sed 's/\.in$//;s/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' -executable -name '*.in' | sort | sed 's/\.in$//;s/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' ! -name '*.in' ! -name '*.c' | grep -Fe /simd -e /tmpfile | sort | sed 's/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' ! -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$!s/$/ \\/' # # simd and tmpfile are Linux-only and not installed elsewhere # # C programs are specced in ../Makefile.am above as part of the main Makefile find_common := find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' regen: @$(MAKE) -C $(top_builddir) clean @$(MAKE) clean $(SED) $(ac_inplace) '/^# -- >8 --/q' Makefile.am echo >> Makefile.am echo 'nobase_nodist_datadir_zfs_tests_tests_DATA = \' >> Makefile.am $(find_common) ! -executable -name '*.in' | sort | sed 's/\.in$$//;s/^/\t/;$$!s/$$/ \\/' >> Makefile.am echo 'nobase_nodist_datadir_zfs_tests_tests_SCRIPTS = \' >> Makefile.am $(find_common) -executable -name '*.in' | sort | sed 's/\.in$$//;s/^/\t/;$$!s/$$/ \\/' >> Makefile.am echo >> Makefile.am echo 'SUBSTFILES += $$(nobase_nodist_datadir_zfs_tests_tests_DATA) $$(nobase_nodist_datadir_zfs_tests_tests_SCRIPTS)' >> Makefile.am echo >> Makefile.am echo 'if BUILD_LINUX' >> Makefile.am echo 'nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \' >> Makefile.am $(find_common) ! -name '*.in' ! -name '*.c' | grep -Fe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am echo 'endif' >> Makefile.am echo >> Makefile.am echo 'nobase_dist_datadir_zfs_tests_tests_DATA += \' >> Makefile.am $(find_common) ! -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am echo >> Makefile.am echo 'nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \' >> Makefile.am $(find_common) -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am # -- >8 -- nobase_nodist_datadir_zfs_tests_tests_DATA = \ functional/pam/utilities.kshlib nobase_nodist_datadir_zfs_tests_tests_SCRIPTS = \ functional/pyzfs/pyzfs_unittest.ksh SUBSTFILES += $(nobase_nodist_datadir_zfs_tests_tests_DATA) $(nobase_nodist_datadir_zfs_tests_tests_SCRIPTS) if BUILD_LINUX nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/simd/simd_supported.ksh \ functional/tmpfile/cleanup.ksh \ functional/tmpfile/setup.ksh endif nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/acl/acl.cfg \ functional/acl/acl_common.kshlib \ functional/alloc_class/alloc_class.cfg \ functional/alloc_class/alloc_class.kshlib \ functional/atime/atime.cfg \ functional/atime/atime_common.kshlib \ functional/cache/cache.cfg \ functional/cache/cache.kshlib \ functional/cachefile/cachefile.cfg \ functional/cachefile/cachefile.kshlib \ functional/casenorm/casenorm.cfg \ functional/casenorm/casenorm.kshlib \ functional/channel_program/channel_common.kshlib \ functional/channel_program/lua_core/tst.args_to_lua.out \ functional/channel_program/lua_core/tst.args_to_lua.zcp \ functional/channel_program/lua_core/tst.divide_by_zero.err \ functional/channel_program/lua_core/tst.divide_by_zero.zcp \ functional/channel_program/lua_core/tst.exists.zcp \ functional/channel_program/lua_core/tst.large_prog.out \ functional/channel_program/lua_core/tst.large_prog.zcp \ functional/channel_program/lua_core/tst.lib_base.lua \ functional/channel_program/lua_core/tst.lib_coroutine.lua \ functional/channel_program/lua_core/tst.lib_strings.lua \ functional/channel_program/lua_core/tst.lib_table.lua \ functional/channel_program/lua_core/tst.nested_neg.zcp \ functional/channel_program/lua_core/tst.nested_pos.zcp \ functional/channel_program/lua_core/tst.recursive.zcp \ functional/channel_program/lua_core/tst.return_large.zcp \ functional/channel_program/lua_core/tst.return_recursive_table.zcp \ functional/channel_program/lua_core/tst.stack_gsub.err \ functional/channel_program/lua_core/tst.stack_gsub.zcp \ functional/channel_program/lua_core/tst.timeout.zcp \ functional/channel_program/synctask_core/tst.bookmark.copy.zcp \ functional/channel_program/synctask_core/tst.bookmark.create.zcp \ functional/channel_program/synctask_core/tst.get_index_props.out \ functional/channel_program/synctask_core/tst.get_index_props.zcp \ functional/channel_program/synctask_core/tst.get_number_props.out \ functional/channel_program/synctask_core/tst.get_number_props.zcp \ functional/channel_program/synctask_core/tst.get_string_props.out \ functional/channel_program/synctask_core/tst.get_string_props.zcp \ functional/channel_program/synctask_core/tst.promote_conflict.zcp \ functional/channel_program/synctask_core/tst.set_props.zcp \ functional/channel_program/synctask_core/tst.snapshot_destroy.zcp \ functional/channel_program/synctask_core/tst.snapshot_neg.zcp \ functional/channel_program/synctask_core/tst.snapshot_recursive.zcp \ functional/channel_program/synctask_core/tst.snapshot_simple.zcp \ functional/checksum/default.cfg \ functional/clean_mirror/clean_mirror_common.kshlib \ functional/clean_mirror/default.cfg \ functional/cli_root/cli_common.kshlib \ functional/cli_root/zfs_copies/zfs_copies.cfg \ functional/cli_root/zfs_copies/zfs_copies.kshlib \ functional/cli_root/zfs_create/properties.kshlib \ functional/cli_root/zfs_create/zfs_create.cfg \ functional/cli_root/zfs_create/zfs_create_common.kshlib \ functional/cli_root/zfs_destroy/zfs_destroy.cfg \ functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib \ functional/cli_root/zfs_get/zfs_get_common.kshlib \ functional/cli_root/zfs_get/zfs_get_list_d.kshlib \ functional/cli_root/zfs_jail/jail.conf \ functional/cli_root/zfs_load-key/HEXKEY \ functional/cli_root/zfs_load-key/PASSPHRASE \ functional/cli_root/zfs_load-key/RAWKEY \ functional/cli_root/zfs_load-key/zfs_load-key.cfg \ functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib \ functional/cli_root/zfs_mount/zfs_mount.cfg \ functional/cli_root/zfs_mount/zfs_mount.kshlib \ functional/cli_root/zfs_promote/zfs_promote.cfg \ functional/cli_root/zfs_receive/zstd_test_data.txt \ functional/cli_root/zfs_rename/zfs_rename.cfg \ functional/cli_root/zfs_rename/zfs_rename.kshlib \ functional/cli_root/zfs_rollback/zfs_rollback.cfg \ functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib \ functional/cli_root/zfs_send/zfs_send.cfg \ functional/cli_root/zfs_set/zfs_set_common.kshlib \ functional/cli_root/zfs_share/zfs_share.cfg \ functional/cli_root/zfs_snapshot/zfs_snapshot.cfg \ functional/cli_root/zfs_unmount/zfs_unmount.cfg \ functional/cli_root/zfs_unmount/zfs_unmount.kshlib \ functional/cli_root/zfs_upgrade/zfs_upgrade.kshlib \ functional/cli_root/zfs_wait/zfs_wait.kshlib \ functional/cli_root/zpool_add/zpool_add.cfg \ functional/cli_root/zpool_add/zpool_add.kshlib \ functional/cli_root/zpool_clear/zpool_clear.cfg \ functional/cli_root/zpool_create/draidcfg.gz \ functional/cli_root/zpool_create/zpool_create.cfg \ functional/cli_root/zpool_create/zpool_create.shlib \ functional/cli_root/zpool_destroy/zpool_destroy.cfg \ functional/cli_root/zpool_events/zpool_events.cfg \ functional/cli_root/zpool_events/zpool_events.kshlib \ functional/cli_root/zpool_expand/zpool_expand.cfg \ functional/cli_root/zpool_export/zpool_export.cfg \ functional/cli_root/zpool_export/zpool_export.kshlib \ functional/cli_root/zpool_get/zpool_get.cfg \ functional/cli_root/zpool_get/zpool_get_parsable.cfg \ functional/cli_root/zpool_import/blockfiles/cryptv0.dat.bz2 \ functional/cli_root/zpool_import/blockfiles/missing_ivset.dat.bz2 \ functional/cli_root/zpool_import/blockfiles/unclean_export.dat.bz2 \ functional/cli_root/zpool_import/zpool_import.cfg \ functional/cli_root/zpool_import/zpool_import.kshlib \ functional/cli_root/zpool_initialize/zpool_initialize.kshlib \ functional/cli_root/zpool_labelclear/labelclear.cfg \ functional/cli_root/zpool_remove/zpool_remove.cfg \ functional/cli_root/zpool_reopen/zpool_reopen.cfg \ functional/cli_root/zpool_reopen/zpool_reopen.shlib \ functional/cli_root/zpool_resilver/zpool_resilver.cfg \ functional/cli_root/zpool_scrub/zpool_scrub.cfg \ functional/cli_root/zpool_split/zpool_split.cfg \ functional/cli_root/zpool_trim/zpool_trim.kshlib \ functional/cli_root/zpool_upgrade/blockfiles/zfs-broken-mirror1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-broken-mirror2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v10.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v11.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v12.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v13.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v14.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v15.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1mirror1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1mirror2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1mirror3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1raidz1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1raidz2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1raidz3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1stripe1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1stripe2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v1stripe3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2mirror1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2mirror2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2mirror3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2raidz1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2raidz2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2raidz3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2stripe1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2stripe2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v2stripe3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3hotspare1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3hotspare2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3hotspare3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3mirror1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3mirror2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3mirror3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz21.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz22.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz23.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3raidz3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3stripe1.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3stripe2.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v3stripe3.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v4.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v5.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v6.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v7.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v8.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v999.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-v9.dat.bz2 \ functional/cli_root/zpool_upgrade/blockfiles/zfs-pool-vBROKEN.dat.bz2 \ functional/cli_root/zpool_upgrade/zpool_upgrade.cfg \ functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib \ functional/cli_root/zpool_wait/zpool_wait.kshlib \ functional/cli_user/misc/misc.cfg \ functional/cli_user/zfs_list/zfs_list.cfg \ functional/cli_user/zfs_list/zfs_list.kshlib \ functional/compression/compress.cfg \ functional/compression/testpool_zstd.tar.gz \ functional/deadman/deadman.cfg \ functional/delegate/delegate.cfg \ functional/delegate/delegate_common.kshlib \ functional/devices/devices.cfg \ functional/devices/devices_common.kshlib \ functional/events/events.cfg \ functional/events/events_common.kshlib \ functional/fault/fault.cfg \ functional/grow/grow.cfg \ functional/history/history.cfg \ functional/history/history_common.kshlib \ functional/history/i386.migratedpool.DAT.Z \ functional/history/i386.orig_history.txt \ functional/history/sparc.migratedpool.DAT.Z \ functional/history/sparc.orig_history.txt \ functional/history/zfs-pool-v4.dat.Z \ functional/inheritance/config001.cfg \ functional/inheritance/config002.cfg \ functional/inheritance/config003.cfg \ functional/inheritance/config004.cfg \ functional/inheritance/config005.cfg \ functional/inheritance/config006.cfg \ functional/inheritance/config007.cfg \ functional/inheritance/config008.cfg \ functional/inheritance/config009.cfg \ functional/inheritance/config010.cfg \ functional/inheritance/config011.cfg \ functional/inheritance/config012.cfg \ functional/inheritance/config013.cfg \ functional/inheritance/config014.cfg \ functional/inheritance/config015.cfg \ functional/inheritance/config016.cfg \ functional/inheritance/config017.cfg \ functional/inheritance/config018.cfg \ functional/inheritance/config019.cfg \ functional/inheritance/config020.cfg \ functional/inheritance/config021.cfg \ functional/inheritance/config022.cfg \ functional/inheritance/config023.cfg \ functional/inheritance/config024.cfg \ functional/inheritance/inherit.kshlib \ functional/inheritance/README.config \ functional/inheritance/README.state \ functional/inheritance/state001.cfg \ functional/inheritance/state002.cfg \ functional/inheritance/state003.cfg \ functional/inheritance/state004.cfg \ functional/inheritance/state005.cfg \ functional/inheritance/state006.cfg \ functional/inheritance/state007.cfg \ functional/inheritance/state008.cfg \ functional/inheritance/state009.cfg \ functional/inheritance/state010.cfg \ functional/inheritance/state011.cfg \ functional/inheritance/state012.cfg \ functional/inheritance/state013.cfg \ functional/inheritance/state014.cfg \ functional/inheritance/state015.cfg \ functional/inheritance/state016.cfg \ functional/inheritance/state017.cfg \ functional/inheritance/state018.cfg \ functional/inheritance/state019.cfg \ functional/inheritance/state020.cfg \ functional/inheritance/state021.cfg \ functional/inheritance/state022.cfg \ functional/inheritance/state023.cfg \ functional/inheritance/state024.cfg \ functional/inuse/inuse.cfg \ functional/io/io.cfg \ functional/l2arc/l2arc.cfg \ functional/largest_pool/largest_pool.cfg \ functional/migration/migration.cfg \ functional/migration/migration.kshlib \ functional/mmap/mmap.cfg \ functional/mmp/mmp.cfg \ functional/mmp/mmp.kshlib \ functional/mv_files/mv_files.cfg \ functional/mv_files/mv_files_common.kshlib \ functional/nopwrite/nopwrite.shlib \ functional/no_space/enospc.cfg \ functional/online_offline/online_offline.cfg \ functional/pool_checkpoint/pool_checkpoint.kshlib \ functional/projectquota/projectquota.cfg \ functional/projectquota/projectquota_common.kshlib \ functional/quota/quota.cfg \ functional/quota/quota.kshlib \ functional/redacted_send/redacted.cfg \ functional/redacted_send/redacted.kshlib \ functional/redundancy/redundancy.cfg \ functional/redundancy/redundancy.kshlib \ functional/refreserv/refreserv.cfg \ functional/removal/removal.kshlib \ functional/replacement/replacement.cfg \ functional/reservation/reservation.cfg \ functional/reservation/reservation.shlib \ functional/rsend/dedup_encrypted_zvol.bz2 \ functional/rsend/dedup_encrypted_zvol.zsend.bz2 \ functional/rsend/dedup.zsend.bz2 \ functional/rsend/fs.tar.gz \ functional/rsend/rsend.cfg \ functional/rsend/rsend.kshlib \ functional/scrub_mirror/default.cfg \ functional/scrub_mirror/scrub_mirror_common.kshlib \ functional/slog/slog.cfg \ functional/slog/slog.kshlib \ functional/snapshot/snapshot.cfg \ functional/snapused/snapused.kshlib \ functional/sparse/sparse.cfg \ functional/trim/trim.cfg \ functional/trim/trim.kshlib \ functional/truncate/truncate.cfg \ functional/upgrade/upgrade_common.kshlib \ functional/user_namespace/user_namespace.cfg \ functional/user_namespace/user_namespace_common.kshlib \ functional/userquota/userquota.cfg \ functional/userquota/userquota_common.kshlib \ functional/vdev_zaps/vdev_zaps.kshlib \ functional/xattr/xattr.cfg \ functional/xattr/xattr_common.kshlib \ functional/zvol/zvol.cfg \ functional/zvol/zvol_cli/zvol_cli.cfg \ functional/zvol/zvol_common.shlib \ functional/zvol/zvol_ENOSPC/zvol_ENOSPC.cfg \ functional/zvol/zvol_misc/zvol_misc_common.kshlib \ functional/zvol/zvol_swap/zvol_swap.cfg nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/acl/off/cleanup.ksh \ functional/acl/off/dosmode.ksh \ functional/acl/off/posixmode.ksh \ functional/acl/off/setup.ksh \ functional/acl/posix/cleanup.ksh \ functional/acl/posix/posix_001_pos.ksh \ functional/acl/posix/posix_002_pos.ksh \ functional/acl/posix/posix_003_pos.ksh \ functional/acl/posix/posix_004_pos.ksh \ functional/acl/posix-sa/cleanup.ksh \ functional/acl/posix-sa/posix_001_pos.ksh \ functional/acl/posix-sa/posix_002_pos.ksh \ functional/acl/posix-sa/posix_003_pos.ksh \ functional/acl/posix-sa/posix_004_pos.ksh \ functional/acl/posix-sa/setup.ksh \ functional/acl/posix/setup.ksh \ functional/alloc_class/alloc_class_001_pos.ksh \ functional/alloc_class/alloc_class_002_neg.ksh \ functional/alloc_class/alloc_class_003_pos.ksh \ functional/alloc_class/alloc_class_004_pos.ksh \ functional/alloc_class/alloc_class_005_pos.ksh \ functional/alloc_class/alloc_class_006_pos.ksh \ functional/alloc_class/alloc_class_007_pos.ksh \ functional/alloc_class/alloc_class_008_pos.ksh \ functional/alloc_class/alloc_class_009_pos.ksh \ functional/alloc_class/alloc_class_010_pos.ksh \ functional/alloc_class/alloc_class_011_neg.ksh \ functional/alloc_class/alloc_class_012_pos.ksh \ functional/alloc_class/alloc_class_013_pos.ksh \ functional/alloc_class/cleanup.ksh \ functional/alloc_class/setup.ksh \ functional/append/file_append.ksh \ functional/append/threadsappend_001_pos.ksh \ functional/append/cleanup.ksh \ functional/append/setup.ksh \ functional/arc/arcstats_runtime_tuning.ksh \ functional/arc/cleanup.ksh \ functional/arc/dbufstats_001_pos.ksh \ functional/arc/dbufstats_002_pos.ksh \ functional/arc/dbufstats_003_pos.ksh \ functional/arc/setup.ksh \ functional/atime/atime_001_pos.ksh \ functional/atime/atime_002_neg.ksh \ functional/atime/atime_003_pos.ksh \ functional/atime/cleanup.ksh \ functional/atime/root_atime_off.ksh \ functional/atime/root_atime_on.ksh \ functional/atime/root_relatime_on.ksh \ functional/atime/setup.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ functional/bootfs/bootfs_004_neg.ksh \ functional/bootfs/bootfs_005_neg.ksh \ functional/bootfs/bootfs_006_pos.ksh \ functional/bootfs/bootfs_007_pos.ksh \ functional/bootfs/bootfs_008_pos.ksh \ functional/bootfs/cleanup.ksh \ functional/bootfs/setup.ksh \ functional/btree/btree_negative.ksh \ functional/btree/btree_positive.ksh \ functional/cache/cache_001_pos.ksh \ functional/cache/cache_002_pos.ksh \ functional/cache/cache_003_pos.ksh \ functional/cache/cache_004_neg.ksh \ functional/cache/cache_005_neg.ksh \ functional/cache/cache_006_pos.ksh \ functional/cache/cache_007_neg.ksh \ functional/cache/cache_008_neg.ksh \ functional/cache/cache_009_pos.ksh \ functional/cache/cache_010_pos.ksh \ functional/cache/cache_011_pos.ksh \ functional/cache/cache_012_pos.ksh \ functional/cache/cleanup.ksh \ functional/cachefile/cachefile_001_pos.ksh \ functional/cachefile/cachefile_002_pos.ksh \ functional/cachefile/cachefile_003_pos.ksh \ functional/cachefile/cachefile_004_pos.ksh \ functional/cachefile/cleanup.ksh \ functional/cachefile/setup.ksh \ functional/cache/setup.ksh \ functional/casenorm/case_all_values.ksh \ functional/casenorm/cleanup.ksh \ functional/casenorm/insensitive_formd_delete.ksh \ functional/casenorm/insensitive_formd_lookup.ksh \ functional/casenorm/insensitive_none_delete.ksh \ functional/casenorm/insensitive_none_lookup.ksh \ functional/casenorm/mixed_create_failure.ksh \ functional/casenorm/mixed_formd_delete.ksh \ functional/casenorm/mixed_formd_lookup_ci.ksh \ functional/casenorm/mixed_formd_lookup.ksh \ functional/casenorm/mixed_none_delete.ksh \ functional/casenorm/mixed_none_lookup_ci.ksh \ functional/casenorm/mixed_none_lookup.ksh \ functional/casenorm/norm_all_values.ksh \ functional/casenorm/sensitive_formd_delete.ksh \ functional/casenorm/sensitive_formd_lookup.ksh \ functional/casenorm/sensitive_none_delete.ksh \ functional/casenorm/sensitive_none_lookup.ksh \ functional/casenorm/setup.ksh \ functional/channel_program/lua_core/cleanup.ksh \ functional/channel_program/lua_core/setup.ksh \ functional/channel_program/lua_core/tst.args_to_lua.ksh \ functional/channel_program/lua_core/tst.divide_by_zero.ksh \ functional/channel_program/lua_core/tst.exists.ksh \ functional/channel_program/lua_core/tst.integer_illegal.ksh \ functional/channel_program/lua_core/tst.integer_overflow.ksh \ functional/channel_program/lua_core/tst.language_functions_neg.ksh \ functional/channel_program/lua_core/tst.language_functions_pos.ksh \ functional/channel_program/lua_core/tst.large_prog.ksh \ functional/channel_program/lua_core/tst.libraries.ksh \ functional/channel_program/lua_core/tst.memory_limit.ksh \ functional/channel_program/lua_core/tst.nested_neg.ksh \ functional/channel_program/lua_core/tst.nested_pos.ksh \ functional/channel_program/lua_core/tst.nvlist_to_lua.ksh \ functional/channel_program/lua_core/tst.recursive_neg.ksh \ functional/channel_program/lua_core/tst.recursive_pos.ksh \ functional/channel_program/lua_core/tst.return_large.ksh \ functional/channel_program/lua_core/tst.return_nvlist_neg.ksh \ functional/channel_program/lua_core/tst.return_nvlist_pos.ksh \ functional/channel_program/lua_core/tst.return_recursive_table.ksh \ functional/channel_program/lua_core/tst.stack_gsub.ksh \ functional/channel_program/lua_core/tst.timeout.ksh \ functional/channel_program/synctask_core/cleanup.ksh \ functional/channel_program/synctask_core/setup.ksh \ functional/channel_program/synctask_core/tst.bookmark.copy.ksh \ functional/channel_program/synctask_core/tst.bookmark.create.ksh \ functional/channel_program/synctask_core/tst.destroy_fs.ksh \ functional/channel_program/synctask_core/tst.destroy_snap.ksh \ functional/channel_program/synctask_core/tst.get_count_and_limit.ksh \ functional/channel_program/synctask_core/tst.get_index_props.ksh \ functional/channel_program/synctask_core/tst.get_mountpoint.ksh \ functional/channel_program/synctask_core/tst.get_neg.ksh \ functional/channel_program/synctask_core/tst.get_number_props.ksh \ functional/channel_program/synctask_core/tst.get_string_props.ksh \ functional/channel_program/synctask_core/tst.get_type.ksh \ functional/channel_program/synctask_core/tst.get_userquota.ksh \ functional/channel_program/synctask_core/tst.get_written.ksh \ functional/channel_program/synctask_core/tst.inherit.ksh \ functional/channel_program/synctask_core/tst.list_bookmarks.ksh \ functional/channel_program/synctask_core/tst.list_children.ksh \ functional/channel_program/synctask_core/tst.list_clones.ksh \ functional/channel_program/synctask_core/tst.list_holds.ksh \ functional/channel_program/synctask_core/tst.list_snapshots.ksh \ functional/channel_program/synctask_core/tst.list_system_props.ksh \ functional/channel_program/synctask_core/tst.list_user_props.ksh \ functional/channel_program/synctask_core/tst.parse_args_neg.ksh \ functional/channel_program/synctask_core/tst.promote_conflict.ksh \ functional/channel_program/synctask_core/tst.promote_multiple.ksh \ functional/channel_program/synctask_core/tst.promote_simple.ksh \ functional/channel_program/synctask_core/tst.rollback_mult.ksh \ functional/channel_program/synctask_core/tst.rollback_one.ksh \ functional/channel_program/synctask_core/tst.set_props.ksh \ functional/channel_program/synctask_core/tst.snapshot_destroy.ksh \ functional/channel_program/synctask_core/tst.snapshot_neg.ksh \ functional/channel_program/synctask_core/tst.snapshot_recursive.ksh \ functional/channel_program/synctask_core/tst.snapshot_simple.ksh \ functional/channel_program/synctask_core/tst.terminate_by_signal.ksh \ functional/chattr/chattr_001_pos.ksh \ functional/chattr/chattr_002_neg.ksh \ functional/chattr/cleanup.ksh \ functional/chattr/setup.ksh \ functional/checksum/cleanup.ksh \ functional/checksum/filetest_001_pos.ksh \ functional/checksum/filetest_002_pos.ksh \ functional/checksum/run_blake3_test.ksh \ functional/checksum/run_edonr_test.ksh \ functional/checksum/run_sha2_test.ksh \ functional/checksum/run_skein_test.ksh \ functional/checksum/setup.ksh \ functional/clean_mirror/clean_mirror_001_pos.ksh \ functional/clean_mirror/clean_mirror_002_pos.ksh \ functional/clean_mirror/clean_mirror_003_pos.ksh \ functional/clean_mirror/clean_mirror_004_pos.ksh \ functional/clean_mirror/cleanup.ksh \ functional/clean_mirror/setup.ksh \ functional/cli_root/zdb/zdb_002_pos.ksh \ functional/cli_root/zdb/zdb_003_pos.ksh \ functional/cli_root/zdb/zdb_004_pos.ksh \ functional/cli_root/zdb/zdb_005_pos.ksh \ functional/cli_root/zdb/zdb_006_pos.ksh \ functional/cli_root/zdb/zdb_args_neg.ksh \ functional/cli_root/zdb/zdb_args_pos.ksh \ functional/cli_root/zdb/zdb_block_size_histogram.ksh \ functional/cli_root/zdb/zdb_checksum.ksh \ functional/cli_root/zdb/zdb_decompress.ksh \ functional/cli_root/zdb/zdb_decompress_zstd.ksh \ functional/cli_root/zdb/zdb_display_block.ksh \ functional/cli_root/zdb/zdb_label_checksum.ksh \ functional/cli_root/zdb/zdb_object_range_neg.ksh \ functional/cli_root/zdb/zdb_object_range_pos.ksh \ functional/cli_root/zdb/zdb_objset_id.ksh \ functional/cli_root/zdb/zdb_recover_2.ksh \ functional/cli_root/zdb/zdb_recover.ksh \ functional/cli_root/zfs_bookmark/cleanup.ksh \ functional/cli_root/zfs_bookmark/setup.ksh \ functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh \ functional/cli_root/zfs_change-key/cleanup.ksh \ functional/cli_root/zfs_change-key/setup.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_child.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_format.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh \ functional/cli_root/zfs_change-key/zfs_change-key.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_load.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_location.ksh \ functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh \ functional/cli_root/zfs/cleanup.ksh \ functional/cli_root/zfs_clone/cleanup.ksh \ functional/cli_root/zfs_clone/setup.ksh \ functional/cli_root/zfs_clone/zfs_clone_001_neg.ksh \ functional/cli_root/zfs_clone/zfs_clone_002_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_003_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_004_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_005_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_006_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_007_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_008_neg.ksh \ functional/cli_root/zfs_clone/zfs_clone_009_neg.ksh \ functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh \ functional/cli_root/zfs_clone/zfs_clone_deeply_nested.ksh \ functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh \ functional/cli_root/zfs_clone/zfs_clone_rm_nested.ksh \ functional/cli_root/zfs_copies/cleanup.ksh \ functional/cli_root/zfs_copies/setup.ksh \ functional/cli_root/zfs_copies/zfs_copies_001_pos.ksh \ functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh \ functional/cli_root/zfs_copies/zfs_copies_003_pos.ksh \ functional/cli_root/zfs_copies/zfs_copies_004_neg.ksh \ functional/cli_root/zfs_copies/zfs_copies_005_neg.ksh \ functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh \ functional/cli_root/zfs_create/cleanup.ksh \ functional/cli_root/zfs_create/setup.ksh \ functional/cli_root/zfs_create/zfs_create_001_pos.ksh \ functional/cli_root/zfs_create/zfs_create_002_pos.ksh \ functional/cli_root/zfs_create/zfs_create_003_pos.ksh \ functional/cli_root/zfs_create/zfs_create_004_pos.ksh \ functional/cli_root/zfs_create/zfs_create_005_pos.ksh \ functional/cli_root/zfs_create/zfs_create_006_pos.ksh \ functional/cli_root/zfs_create/zfs_create_007_pos.ksh \ functional/cli_root/zfs_create/zfs_create_008_neg.ksh \ functional/cli_root/zfs_create/zfs_create_009_neg.ksh \ functional/cli_root/zfs_create/zfs_create_010_neg.ksh \ functional/cli_root/zfs_create/zfs_create_011_pos.ksh \ functional/cli_root/zfs_create/zfs_create_012_pos.ksh \ functional/cli_root/zfs_create/zfs_create_013_pos.ksh \ functional/cli_root/zfs_create/zfs_create_014_pos.ksh \ functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh \ functional/cli_root/zfs_create/zfs_create_dryrun.ksh \ functional/cli_root/zfs_create/zfs_create_encrypted.ksh \ functional/cli_root/zfs_create/zfs_create_nomount.ksh \ functional/cli_root/zfs_create/zfs_create_verbose.ksh \ functional/cli_root/zfs_destroy/cleanup.ksh \ functional/cli_root/zfs_destroy/setup.ksh \ functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_and_disable.ksh \ functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_races.ksh \ functional/cli_root/zfs_destroy/zfs_clone_livelist_dedup.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_002_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_004_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_006_neg.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_007_neg.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_008_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_009_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_010_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_011_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_012_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_013_neg.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_015_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_clone_livelist.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_dev_removal_condense.ksh \ functional/cli_root/zfs_destroy/zfs_destroy_dev_removal.ksh \ functional/cli_root/zfs_diff/cleanup.ksh \ functional/cli_root/zfs_diff/setup.ksh \ functional/cli_root/zfs_diff/zfs_diff_changes.ksh \ functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh \ functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh \ functional/cli_root/zfs_diff/zfs_diff_mangle.ksh \ functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh \ functional/cli_root/zfs_diff/zfs_diff_types.ksh \ functional/cli_root/zfs_get/cleanup.ksh \ functional/cli_root/zfs_get/setup.ksh \ functional/cli_root/zfs_get/zfs_get_001_pos.ksh \ functional/cli_root/zfs_get/zfs_get_002_pos.ksh \ functional/cli_root/zfs_get/zfs_get_003_pos.ksh \ functional/cli_root/zfs_get/zfs_get_004_pos.ksh \ functional/cli_root/zfs_get/zfs_get_005_neg.ksh \ functional/cli_root/zfs_get/zfs_get_006_neg.ksh \ functional/cli_root/zfs_get/zfs_get_007_neg.ksh \ functional/cli_root/zfs_get/zfs_get_008_pos.ksh \ functional/cli_root/zfs_get/zfs_get_009_pos.ksh \ functional/cli_root/zfs_get/zfs_get_010_neg.ksh \ functional/cli_root/zfs_ids_to_path/cleanup.ksh \ functional/cli_root/zfs_ids_to_path/setup.ksh \ functional/cli_root/zfs_ids_to_path/zfs_ids_to_path_001_pos.ksh \ functional/cli_root/zfs_inherit/cleanup.ksh \ functional/cli_root/zfs_inherit/setup.ksh \ functional/cli_root/zfs_inherit/zfs_inherit_001_neg.ksh \ functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh \ functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh \ functional/cli_root/zfs_inherit/zfs_inherit_mountpoint.ksh \ functional/cli_root/zfs_jail/cleanup.ksh \ functional/cli_root/zfs_jail/setup.ksh \ functional/cli_root/zfs_jail/zfs_jail_001_pos.ksh \ functional/cli_root/zfs_load-key/cleanup.ksh \ functional/cli_root/zfs_load-key/setup.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_all.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_file.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_https.ksh \ functional/cli_root/zfs_load-key/zfs_load-key.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_location.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh \ functional/cli_root/zfs_mount/cleanup.ksh \ functional/cli_root/zfs_mount/setup.ksh \ functional/cli_root/zfs_mount/zfs_mount_001_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_002_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_003_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_004_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_005_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_009_neg.ksh \ functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh \ functional/cli_root/zfs_mount/zfs_mount_011_neg.ksh \ functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_013_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_014_neg.ksh \ functional/cli_root/zfs_mount/zfs_mount_all_001_pos.ksh \ functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh \ functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh \ functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh \ functional/cli_root/zfs_mount/zfs_mount_remount.ksh \ functional/cli_root/zfs_mount/zfs_mount_test_race.ksh \ functional/cli_root/zfs_mount/zfs_multi_mount.ksh \ functional/cli_root/zfs_program/cleanup.ksh \ functional/cli_root/zfs_program/setup.ksh \ functional/cli_root/zfs_program/zfs_program_json.ksh \ functional/cli_root/zfs_promote/cleanup.ksh \ functional/cli_root/zfs_promote/setup.ksh \ functional/cli_root/zfs_promote/zfs_promote_001_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_002_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_003_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_004_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_005_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_006_neg.ksh \ functional/cli_root/zfs_promote/zfs_promote_007_neg.ksh \ functional/cli_root/zfs_promote/zfs_promote_008_pos.ksh \ functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh \ functional/cli_root/zfs_property/cleanup.ksh \ functional/cli_root/zfs_property/setup.ksh \ functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh \ functional/cli_root/zfs_receive/cleanup.ksh \ functional/cli_root/zfs_receive/receive-o-x_props_aliases.ksh \ functional/cli_root/zfs_receive/receive-o-x_props_override.ksh \ functional/cli_root/zfs_receive/setup.ksh \ functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_002_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_003_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh \ functional/cli_root/zfs_receive/zfs_receive_005_neg.ksh \ functional/cli_root/zfs_receive/zfs_receive_006_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_007_neg.ksh \ functional/cli_root/zfs_receive/zfs_receive_008_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_009_neg.ksh \ functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_011_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_012_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_014_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_015_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_016_pos.ksh \ functional/cli_root/zfs_receive/zfs_receive_-e.ksh \ functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh \ functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh \ functional/cli_root/zfs_receive/zfs_receive_new_props.ksh \ functional/cli_root/zfs_receive/zfs_receive_raw_-d.ksh \ functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh \ functional/cli_root/zfs_receive/zfs_receive_raw.ksh \ functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh \ functional/cli_root/zfs_receive/zfs_receive_-wR-encrypted-mix.ksh \ functional/cli_root/zfs_receive/zfs_receive_corrective.ksh \ functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh \ functional/cli_root/zfs_rename/cleanup.ksh \ functional/cli_root/zfs_rename/setup.ksh \ functional/cli_root/zfs_rename/zfs_rename_001_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_002_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_003_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_004_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_005_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_006_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_007_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_008_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_009_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_010_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_011_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_012_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_013_pos.ksh \ functional/cli_root/zfs_rename/zfs_rename_014_neg.ksh \ functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh \ functional/cli_root/zfs_rename/zfs_rename_mountpoint.ksh \ functional/cli_root/zfs_rename/zfs_rename_nounmount.ksh \ functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh \ functional/cli_root/zfs_reservation/cleanup.ksh \ functional/cli_root/zfs_reservation/setup.ksh \ functional/cli_root/zfs_reservation/zfs_reservation_001_pos.ksh \ functional/cli_root/zfs_reservation/zfs_reservation_002_pos.ksh \ functional/cli_root/zfs_rollback/cleanup.ksh \ functional/cli_root/zfs_rollback/setup.ksh \ functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh \ functional/cli_root/zfs_rollback/zfs_rollback_002_pos.ksh \ functional/cli_root/zfs_rollback/zfs_rollback_003_neg.ksh \ functional/cli_root/zfs_rollback/zfs_rollback_004_neg.ksh \ functional/cli_root/zfs_send/cleanup.ksh \ functional/cli_root/zfs_send/setup.ksh \ functional/cli_root/zfs_send/zfs_send_001_pos.ksh \ functional/cli_root/zfs_send/zfs_send_002_pos.ksh \ functional/cli_root/zfs_send/zfs_send_003_pos.ksh \ functional/cli_root/zfs_send/zfs_send_004_neg.ksh \ functional/cli_root/zfs_send/zfs_send_005_pos.ksh \ functional/cli_root/zfs_send/zfs_send_006_pos.ksh \ functional/cli_root/zfs_send/zfs_send_007_pos.ksh \ functional/cli_root/zfs_send/zfs_send-b.ksh \ functional/cli_root/zfs_send/zfs_send_encrypted.ksh \ functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh \ functional/cli_root/zfs_send/zfs_send_raw.ksh \ functional/cli_root/zfs_send/zfs_send_skip_missing.ksh \ functional/cli_root/zfs_send/zfs_send_sparse.ksh \ functional/cli_root/zfs_set/cache_001_pos.ksh \ functional/cli_root/zfs_set/cache_002_neg.ksh \ functional/cli_root/zfs_set/canmount_001_pos.ksh \ functional/cli_root/zfs_set/canmount_002_pos.ksh \ functional/cli_root/zfs_set/canmount_003_pos.ksh \ functional/cli_root/zfs_set/canmount_004_pos.ksh \ functional/cli_root/zfs_set/checksum_001_pos.ksh \ functional/cli_root/zfs_set/cleanup.ksh \ functional/cli_root/zfs_set/compression_001_pos.ksh \ functional/cli_root/zfs_set/mountpoint_001_pos.ksh \ functional/cli_root/zfs_set/mountpoint_002_pos.ksh \ functional/cli_root/zfs_set/mountpoint_003_pos.ksh \ functional/cli_root/zfs_set/onoffs_001_pos.ksh \ functional/cli_root/zfs_set/property_alias_001_pos.ksh \ functional/cli_root/zfs_set/readonly_001_pos.ksh \ functional/cli_root/zfs_set/reservation_001_neg.ksh \ functional/cli_root/zfs_set/ro_props_001_pos.ksh \ functional/cli_root/zfs_set/setup.ksh \ functional/cli_root/zfs_set/share_mount_001_neg.ksh \ functional/cli_root/zfs_set/snapdir_001_pos.ksh \ functional/cli_root/zfs/setup.ksh \ functional/cli_root/zfs_set/user_property_001_pos.ksh \ functional/cli_root/zfs_set/user_property_002_pos.ksh \ functional/cli_root/zfs_set/user_property_003_neg.ksh \ functional/cli_root/zfs_set/user_property_004_pos.ksh \ functional/cli_root/zfs_set/version_001_neg.ksh \ functional/cli_root/zfs_set/zfs_set_001_neg.ksh \ functional/cli_root/zfs_set/zfs_set_002_neg.ksh \ functional/cli_root/zfs_set/zfs_set_003_neg.ksh \ functional/cli_root/zfs_set/zfs_set_feature_activation.ksh \ functional/cli_root/zfs_set/zfs_set_keylocation.ksh \ functional/cli_root/zfs_share/cleanup.ksh \ functional/cli_root/zfs_share/setup.ksh \ functional/cli_root/zfs_share/zfs_share_001_pos.ksh \ functional/cli_root/zfs_share/zfs_share_002_pos.ksh \ functional/cli_root/zfs_share/zfs_share_003_pos.ksh \ functional/cli_root/zfs_share/zfs_share_004_pos.ksh \ functional/cli_root/zfs_share/zfs_share_005_pos.ksh \ functional/cli_root/zfs_share/zfs_share_006_pos.ksh \ functional/cli_root/zfs_share/zfs_share_007_neg.ksh \ functional/cli_root/zfs_share/zfs_share_008_neg.ksh \ functional/cli_root/zfs_share/zfs_share_009_neg.ksh \ functional/cli_root/zfs_share/zfs_share_010_neg.ksh \ functional/cli_root/zfs_share/zfs_share_011_pos.ksh \ functional/cli_root/zfs_share/zfs_share_012_pos.ksh \ functional/cli_root/zfs_share/zfs_share_013_pos.ksh \ functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh \ functional/cli_root/zfs_snapshot/cleanup.ksh \ functional/cli_root/zfs_snapshot/setup.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_003_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_004_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_005_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_006_pos.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_007_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh \ functional/cli_root/zfs_sysfs/cleanup.ksh \ functional/cli_root/zfs_sysfs/setup.ksh \ functional/cli_root/zfs_sysfs/zfeature_set_unsupported.ksh \ functional/cli_root/zfs_sysfs/zfs_get_unsupported.ksh \ functional/cli_root/zfs_sysfs/zfs_set_unsupported.ksh \ functional/cli_root/zfs_sysfs/zfs_sysfs_live.ksh \ functional/cli_root/zfs_sysfs/zpool_get_unsupported.ksh \ functional/cli_root/zfs_sysfs/zpool_set_unsupported.ksh \ functional/cli_root/zfs_unload-key/cleanup.ksh \ functional/cli_root/zfs_unload-key/setup.ksh \ functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh \ functional/cli_root/zfs_unload-key/zfs_unload-key.ksh \ functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh \ functional/cli_root/zfs_unmount/cleanup.ksh \ functional/cli_root/zfs_unmount/setup.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_001_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_002_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_003_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_004_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_005_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_006_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_007_neg.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_all_001_pos.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_nested.ksh \ functional/cli_root/zfs_unmount/zfs_unmount_unload_keys.ksh \ functional/cli_root/zfs_unshare/cleanup.ksh \ functional/cli_root/zfs_unshare/setup.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_002_pos.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_003_pos.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_005_neg.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_006_pos.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_007_pos.ksh \ functional/cli_root/zfs_unshare/zfs_unshare_008_pos.ksh \ functional/cli_root/zfs_upgrade/cleanup.ksh \ functional/cli_root/zfs_upgrade/setup.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_002_pos.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_003_pos.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_004_pos.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_005_pos.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_006_neg.ksh \ functional/cli_root/zfs_upgrade/zfs_upgrade_007_neg.ksh \ functional/cli_root/zfs_wait/cleanup.ksh \ functional/cli_root/zfs_wait/setup.ksh \ functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh \ functional/cli_root/zfs_wait/zfs_wait_getsubopt.ksh \ functional/cli_root/zfs/zfs_001_neg.ksh \ functional/cli_root/zfs/zfs_002_pos.ksh \ functional/cli_root/zfs/zfs_003_neg.ksh \ functional/cli_root/zhack/zhack_label_checksum.ksh \ functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \ functional/cli_root/zpool_add/add-o_ashift.ksh \ functional/cli_root/zpool_add/add_prop_ashift.ksh \ functional/cli_root/zpool_add/cleanup.ksh \ functional/cli_root/zpool_add/setup.ksh \ functional/cli_root/zpool_add/zpool_add_001_pos.ksh \ functional/cli_root/zpool_add/zpool_add_002_pos.ksh \ functional/cli_root/zpool_add/zpool_add_003_pos.ksh \ functional/cli_root/zpool_add/zpool_add_004_pos.ksh \ functional/cli_root/zpool_add/zpool_add_005_pos.ksh \ functional/cli_root/zpool_add/zpool_add_006_pos.ksh \ functional/cli_root/zpool_add/zpool_add_007_neg.ksh \ functional/cli_root/zpool_add/zpool_add_008_neg.ksh \ functional/cli_root/zpool_add/zpool_add_009_neg.ksh \ functional/cli_root/zpool_add/zpool_add_010_pos.ksh \ functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh \ functional/cli_root/zpool_attach/attach-o_ashift.ksh \ functional/cli_root/zpool_attach/cleanup.ksh \ functional/cli_root/zpool_attach/setup.ksh \ functional/cli_root/zpool_attach/zpool_attach_001_neg.ksh \ functional/cli_root/zpool/cleanup.ksh \ functional/cli_root/zpool_clear/cleanup.ksh \ functional/cli_root/zpool_clear/setup.ksh \ functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh \ functional/cli_root/zpool_clear/zpool_clear_002_neg.ksh \ functional/cli_root/zpool_clear/zpool_clear_003_neg.ksh \ functional/cli_root/zpool_clear/zpool_clear_readonly.ksh \ functional/cli_root/zpool_create/cleanup.ksh \ functional/cli_root/zpool_create/create-o_ashift.ksh \ functional/cli_root/zpool_create/setup.ksh \ functional/cli_root/zpool_create/zpool_create_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_002_pos.ksh \ functional/cli_root/zpool_create/zpool_create_003_pos.ksh \ functional/cli_root/zpool_create/zpool_create_004_pos.ksh \ functional/cli_root/zpool_create/zpool_create_005_pos.ksh \ functional/cli_root/zpool_create/zpool_create_006_pos.ksh \ functional/cli_root/zpool_create/zpool_create_007_neg.ksh \ functional/cli_root/zpool_create/zpool_create_008_pos.ksh \ functional/cli_root/zpool_create/zpool_create_009_neg.ksh \ functional/cli_root/zpool_create/zpool_create_010_neg.ksh \ functional/cli_root/zpool_create/zpool_create_011_neg.ksh \ functional/cli_root/zpool_create/zpool_create_012_neg.ksh \ functional/cli_root/zpool_create/zpool_create_014_neg.ksh \ functional/cli_root/zpool_create/zpool_create_015_neg.ksh \ functional/cli_root/zpool_create/zpool_create_016_pos.ksh \ functional/cli_root/zpool_create/zpool_create_017_neg.ksh \ functional/cli_root/zpool_create/zpool_create_018_pos.ksh \ functional/cli_root/zpool_create/zpool_create_019_pos.ksh \ functional/cli_root/zpool_create/zpool_create_020_pos.ksh \ functional/cli_root/zpool_create/zpool_create_021_pos.ksh \ functional/cli_root/zpool_create/zpool_create_022_pos.ksh \ functional/cli_root/zpool_create/zpool_create_023_neg.ksh \ functional/cli_root/zpool_create/zpool_create_024_pos.ksh \ functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh \ functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh \ functional/cli_root/zpool_create/zpool_create_encrypted.ksh \ functional/cli_root/zpool_create/zpool_create_features_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_002_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_003_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_004_neg.ksh \ functional/cli_root/zpool_create/zpool_create_features_005_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_006_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh \ functional/cli_root/zpool_create/zpool_create_tempname.ksh \ functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh \ functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh \ functional/cli_root/zpool_destroy/zpool_destroy_003_neg.ksh \ functional/cli_root/zpool_detach/cleanup.ksh \ functional/cli_root/zpool_detach/setup.ksh \ functional/cli_root/zpool_detach/zpool_detach_001_neg.ksh \ functional/cli_root/zpool_events/cleanup.ksh \ functional/cli_root/zpool_events/setup.ksh \ functional/cli_root/zpool_events/zpool_events_clear.ksh \ functional/cli_root/zpool_events/zpool_events_clear_retained.ksh \ functional/cli_root/zpool_events/zpool_events_cliargs.ksh \ functional/cli_root/zpool_events/zpool_events_duplicates.ksh \ functional/cli_root/zpool_events/zpool_events_errors.ksh \ functional/cli_root/zpool_events/zpool_events_follow.ksh \ functional/cli_root/zpool_events/zpool_events_poolname.ksh \ functional/cli_root/zpool_expand/cleanup.ksh \ functional/cli_root/zpool_expand/setup.ksh \ functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh \ functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh \ functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh \ functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh \ functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh \ functional/cli_root/zpool_export/cleanup.ksh \ functional/cli_root/zpool_export/setup.ksh \ functional/cli_root/zpool_export/zpool_export_001_pos.ksh \ functional/cli_root/zpool_export/zpool_export_002_pos.ksh \ functional/cli_root/zpool_export/zpool_export_003_neg.ksh \ functional/cli_root/zpool_export/zpool_export_004_pos.ksh \ functional/cli_root/zpool_get/cleanup.ksh \ functional/cli_root/zpool_get/setup.ksh \ functional/cli_root/zpool_get/zpool_get_001_pos.ksh \ functional/cli_root/zpool_get/zpool_get_002_pos.ksh \ functional/cli_root/zpool_get/zpool_get_003_pos.ksh \ functional/cli_root/zpool_get/zpool_get_004_neg.ksh \ functional/cli_root/zpool_get/zpool_get_005_pos.ksh \ functional/cli_root/zpool_history/cleanup.ksh \ functional/cli_root/zpool_history/setup.ksh \ functional/cli_root/zpool_history/zpool_history_001_neg.ksh \ functional/cli_root/zpool_history/zpool_history_002_pos.ksh \ functional/cli_root/zpool_import/cleanup.ksh \ functional/cli_root/zpool_import/import_cachefile_device_added.ksh \ functional/cli_root/zpool_import/import_cachefile_device_removed.ksh \ functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh \ functional/cli_root/zpool_import/import_cachefile_mirror_attached.ksh \ functional/cli_root/zpool_import/import_cachefile_mirror_detached.ksh \ functional/cli_root/zpool_import/import_cachefile_paths_changed.ksh \ functional/cli_root/zpool_import/import_cachefile_shared_device.ksh \ functional/cli_root/zpool_import/import_devices_missing.ksh \ functional/cli_root/zpool_import/import_paths_changed.ksh \ functional/cli_root/zpool_import/import_rewind_config_changed.ksh \ functional/cli_root/zpool_import/import_rewind_device_replaced.ksh \ functional/cli_root/zpool_import/setup.ksh \ functional/cli_root/zpool_import/zpool_import_001_pos.ksh \ functional/cli_root/zpool_import/zpool_import_002_pos.ksh \ functional/cli_root/zpool_import/zpool_import_003_pos.ksh \ functional/cli_root/zpool_import/zpool_import_004_pos.ksh \ functional/cli_root/zpool_import/zpool_import_005_pos.ksh \ functional/cli_root/zpool_import/zpool_import_006_pos.ksh \ functional/cli_root/zpool_import/zpool_import_007_pos.ksh \ functional/cli_root/zpool_import/zpool_import_008_pos.ksh \ functional/cli_root/zpool_import/zpool_import_009_neg.ksh \ functional/cli_root/zpool_import/zpool_import_010_pos.ksh \ functional/cli_root/zpool_import/zpool_import_011_neg.ksh \ functional/cli_root/zpool_import/zpool_import_012_pos.ksh \ functional/cli_root/zpool_import/zpool_import_013_neg.ksh \ functional/cli_root/zpool_import/zpool_import_014_pos.ksh \ functional/cli_root/zpool_import/zpool_import_015_pos.ksh \ functional/cli_root/zpool_import/zpool_import_016_pos.ksh \ functional/cli_root/zpool_import/zpool_import_017_pos.ksh \ functional/cli_root/zpool_import/zpool_import_all_001_pos.ksh \ functional/cli_root/zpool_import/zpool_import_encrypted.ksh \ functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh \ functional/cli_root/zpool_import/zpool_import_errata3.ksh \ functional/cli_root/zpool_import/zpool_import_errata4.ksh \ functional/cli_root/zpool_import/zpool_import_features_001_pos.ksh \ functional/cli_root/zpool_import/zpool_import_features_002_neg.ksh \ functional/cli_root/zpool_import/zpool_import_features_003_pos.ksh \ functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh \ functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh \ functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \ functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \ functional/cli_root/zpool_initialize/cleanup.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_split.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_suspend_resume.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_unsupported_vdevs.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh \ functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh \ functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh \ functional/cli_root/zpool_labelclear/zpool_labelclear_removed.ksh \ functional/cli_root/zpool_labelclear/zpool_labelclear_valid.ksh \ functional/cli_root/zpool_offline/cleanup.ksh \ functional/cli_root/zpool_offline/setup.ksh \ functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh \ functional/cli_root/zpool_offline/zpool_offline_002_neg.ksh \ functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh \ functional/cli_root/zpool_online/cleanup.ksh \ functional/cli_root/zpool_online/setup.ksh \ functional/cli_root/zpool_online/zpool_online_001_pos.ksh \ functional/cli_root/zpool_online/zpool_online_002_neg.ksh \ functional/cli_root/zpool_remove/cleanup.ksh \ functional/cli_root/zpool_remove/setup.ksh \ functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh \ functional/cli_root/zpool_remove/zpool_remove_002_pos.ksh \ functional/cli_root/zpool_remove/zpool_remove_003_pos.ksh \ functional/cli_root/zpool_reopen/cleanup.ksh \ functional/cli_root/zpool_reopen/setup.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_001_pos.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_002_pos.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_006_neg.ksh \ functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh \ functional/cli_root/zpool_replace/cleanup.ksh \ functional/cli_root/zpool_replace/replace-o_ashift.ksh \ functional/cli_root/zpool_replace/replace_prop_ashift.ksh \ functional/cli_root/zpool_replace/setup.ksh \ functional/cli_root/zpool_replace/zpool_replace_001_neg.ksh \ functional/cli_root/zpool_resilver/cleanup.ksh \ functional/cli_root/zpool_resilver/setup.ksh \ functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \ functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \ functional/cli_root/zpool_scrub/cleanup.ksh \ functional/cli_root/zpool_scrub/setup.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_005_pos.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \ functional/cli_root/zpool_set/cleanup.ksh \ functional/cli_root/zpool_set/setup.ksh \ functional/cli_root/zpool/setup.ksh \ functional/cli_root/zpool_set/zpool_set_001_pos.ksh \ functional/cli_root/zpool_set/zpool_set_002_neg.ksh \ functional/cli_root/zpool_set/zpool_set_003_neg.ksh \ functional/cli_root/zpool_set/zpool_set_ashift.ksh \ functional/cli_root/zpool_set/zpool_set_features.ksh \ functional/cli_root/zpool_split/cleanup.ksh \ functional/cli_root/zpool_split/setup.ksh \ functional/cli_root/zpool_split/zpool_split_cliargs.ksh \ functional/cli_root/zpool_split/zpool_split_devices.ksh \ functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh \ functional/cli_root/zpool_split/zpool_split_encryption.ksh \ functional/cli_root/zpool_split/zpool_split_indirect.ksh \ functional/cli_root/zpool_split/zpool_split_props.ksh \ functional/cli_root/zpool_split/zpool_split_resilver.ksh \ functional/cli_root/zpool_split/zpool_split_vdevs.ksh \ functional/cli_root/zpool_split/zpool_split_wholedisk.ksh \ functional/cli_root/zpool_status/cleanup.ksh \ functional/cli_root/zpool_status/setup.ksh \ functional/cli_root/zpool_status/zpool_status_001_pos.ksh \ functional/cli_root/zpool_status/zpool_status_002_pos.ksh \ functional/cli_root/zpool_status/zpool_status_003_pos.ksh \ functional/cli_root/zpool_status/zpool_status_004_pos.ksh \ + functional/cli_root/zpool_status/zpool_status_005_pos.ksh \ functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \ functional/cli_root/zpool_sync/cleanup.ksh \ functional/cli_root/zpool_sync/setup.ksh \ functional/cli_root/zpool_sync/zpool_sync_001_pos.ksh \ functional/cli_root/zpool_sync/zpool_sync_002_neg.ksh \ functional/cli_root/zpool_trim/cleanup.ksh \ functional/cli_root/zpool_trim/setup.ksh \ functional/cli_root/zpool_trim/zpool_trim_attach_detach_add_remove.ksh \ functional/cli_root/zpool_trim/zpool_trim_fault_export_import_online.ksh \ functional/cli_root/zpool_trim/zpool_trim_import_export.ksh \ functional/cli_root/zpool_trim/zpool_trim_multiple.ksh \ functional/cli_root/zpool_trim/zpool_trim_neg.ksh \ functional/cli_root/zpool_trim/zpool_trim_offline_export_import_online.ksh \ functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh \ functional/cli_root/zpool_trim/zpool_trim_partial.ksh \ functional/cli_root/zpool_trim/zpool_trim_rate.ksh \ functional/cli_root/zpool_trim/zpool_trim_rate_neg.ksh \ functional/cli_root/zpool_trim/zpool_trim_secure.ksh \ functional/cli_root/zpool_trim/zpool_trim_split.ksh \ functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh \ functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh \ functional/cli_root/zpool_trim/zpool_trim_suspend_resume.ksh \ functional/cli_root/zpool_trim/zpool_trim_unsupported_vdevs.ksh \ functional/cli_root/zpool_trim/zpool_trim_verify_checksums.ksh \ functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh \ functional/cli_root/zpool_upgrade/cleanup.ksh \ functional/cli_root/zpool_upgrade/setup.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_001_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_002_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_003_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_004_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_005_neg.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_006_neg.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_008_pos.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_009_neg.ksh \ functional/cli_root/zpool_upgrade/zpool_upgrade_features_001_pos.ksh \ functional/cli_root/zpool_wait/cleanup.ksh \ functional/cli_root/zpool_wait/scan/cleanup.ksh \ functional/cli_root/zpool_wait/scan/setup.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_rebuild.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_replace_cancel.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_replace.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_resilver.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_scrub_basic.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_scrub_cancel.ksh \ functional/cli_root/zpool_wait/scan/zpool_wait_scrub_flag.ksh \ functional/cli_root/zpool_wait/setup.ksh \ functional/cli_root/zpool_wait/zpool_wait_discard.ksh \ functional/cli_root/zpool_wait/zpool_wait_freeing.ksh \ functional/cli_root/zpool_wait/zpool_wait_initialize_basic.ksh \ functional/cli_root/zpool_wait/zpool_wait_initialize_cancel.ksh \ functional/cli_root/zpool_wait/zpool_wait_initialize_flag.ksh \ functional/cli_root/zpool_wait/zpool_wait_multiple.ksh \ functional/cli_root/zpool_wait/zpool_wait_no_activity.ksh \ functional/cli_root/zpool_wait/zpool_wait_remove_cancel.ksh \ functional/cli_root/zpool_wait/zpool_wait_remove.ksh \ functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh \ functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh \ functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh \ functional/cli_root/zpool_wait/zpool_wait_usage.ksh \ functional/cli_root/zpool/zpool_001_neg.ksh \ functional/cli_root/zpool/zpool_002_pos.ksh \ functional/cli_root/zpool/zpool_003_pos.ksh \ functional/cli_root/zpool/zpool_colors.ksh \ functional/cli_user/misc/arcstat_001_pos.ksh \ functional/cli_user/misc/arc_summary_001_pos.ksh \ functional/cli_user/misc/arc_summary_002_neg.ksh \ functional/cli_user/misc/cleanup.ksh \ functional/cli_user/misc/setup.ksh \ functional/cli_user/misc/zdb_001_neg.ksh \ functional/cli_user/misc/zfs_001_neg.ksh \ functional/cli_user/misc/zfs_allow_001_neg.ksh \ functional/cli_user/misc/zfs_clone_001_neg.ksh \ functional/cli_user/misc/zfs_create_001_neg.ksh \ functional/cli_user/misc/zfs_destroy_001_neg.ksh \ functional/cli_user/misc/zfs_get_001_neg.ksh \ functional/cli_user/misc/zfs_inherit_001_neg.ksh \ functional/cli_user/misc/zfs_mount_001_neg.ksh \ functional/cli_user/misc/zfs_promote_001_neg.ksh \ functional/cli_user/misc/zfs_receive_001_neg.ksh \ functional/cli_user/misc/zfs_rename_001_neg.ksh \ functional/cli_user/misc/zfs_rollback_001_neg.ksh \ functional/cli_user/misc/zfs_send_001_neg.ksh \ functional/cli_user/misc/zfs_set_001_neg.ksh \ functional/cli_user/misc/zfs_share_001_neg.ksh \ functional/cli_user/misc/zfs_snapshot_001_neg.ksh \ functional/cli_user/misc/zfs_unallow_001_neg.ksh \ functional/cli_user/misc/zfs_unmount_001_neg.ksh \ functional/cli_user/misc/zfs_unshare_001_neg.ksh \ functional/cli_user/misc/zfs_upgrade_001_neg.ksh \ functional/cli_user/misc/zpool_001_neg.ksh \ functional/cli_user/misc/zpool_add_001_neg.ksh \ functional/cli_user/misc/zpool_attach_001_neg.ksh \ functional/cli_user/misc/zpool_clear_001_neg.ksh \ functional/cli_user/misc/zpool_create_001_neg.ksh \ functional/cli_user/misc/zpool_destroy_001_neg.ksh \ functional/cli_user/misc/zpool_detach_001_neg.ksh \ functional/cli_user/misc/zpool_export_001_neg.ksh \ functional/cli_user/misc/zpool_get_001_neg.ksh \ functional/cli_user/misc/zpool_history_001_neg.ksh \ functional/cli_user/misc/zpool_import_001_neg.ksh \ functional/cli_user/misc/zpool_import_002_neg.ksh \ functional/cli_user/misc/zpool_offline_001_neg.ksh \ functional/cli_user/misc/zpool_online_001_neg.ksh \ functional/cli_user/misc/zpool_remove_001_neg.ksh \ functional/cli_user/misc/zpool_replace_001_neg.ksh \ functional/cli_user/misc/zpool_scrub_001_neg.ksh \ functional/cli_user/misc/zpool_set_001_neg.ksh \ functional/cli_user/misc/zpool_status_001_neg.ksh \ functional/cli_user/misc/zpool_upgrade_001_neg.ksh \ functional/cli_user/misc/zpool_wait_privilege.ksh \ functional/cli_user/zfs_list/cleanup.ksh \ functional/cli_user/zfs_list/setup.ksh \ functional/cli_user/zfs_list/zfs_list_001_pos.ksh \ functional/cli_user/zfs_list/zfs_list_002_pos.ksh \ functional/cli_user/zfs_list/zfs_list_003_pos.ksh \ functional/cli_user/zfs_list/zfs_list_004_neg.ksh \ functional/cli_user/zfs_list/zfs_list_005_neg.ksh \ functional/cli_user/zfs_list/zfs_list_007_pos.ksh \ functional/cli_user/zfs_list/zfs_list_008_neg.ksh \ functional/cli_user/zpool_iostat/cleanup.ksh \ functional/cli_user/zpool_iostat/setup.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_001_neg.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_-c_disable.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_-c_homedir.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_-c_searchpath.ksh \ functional/cli_user/zpool_list/cleanup.ksh \ functional/cli_user/zpool_list/setup.ksh \ functional/cli_user/zpool_list/zpool_list_001_pos.ksh \ functional/cli_user/zpool_list/zpool_list_002_neg.ksh \ functional/cli_user/zpool_status/cleanup.ksh \ functional/cli_user/zpool_status/setup.ksh \ functional/cli_user/zpool_status/zpool_status_003_pos.ksh \ functional/cli_user/zpool_status/zpool_status_-c_disable.ksh \ functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh \ functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh \ functional/compression/cleanup.ksh \ functional/compression/compress_001_pos.ksh \ functional/compression/compress_002_pos.ksh \ functional/compression/compress_003_pos.ksh \ functional/compression/compress_004_pos.ksh \ functional/compression/compress_zstd_bswap.ksh \ functional/compression/l2arc_compressed_arc_disabled.ksh \ functional/compression/l2arc_compressed_arc.ksh \ functional/compression/l2arc_encrypted.ksh \ functional/compression/l2arc_encrypted_no_compressed_arc.ksh \ functional/compression/setup.ksh \ functional/cp_files/cleanup.ksh \ functional/cp_files/cp_files_001_pos.ksh \ functional/cp_files/setup.ksh \ functional/crtime/cleanup.ksh \ functional/crtime/crtime_001_pos.ksh \ functional/crtime/setup.ksh \ functional/ctime/cleanup.ksh \ functional/ctime/ctime_001_pos.ksh \ functional/ctime/setup.ksh \ functional/deadman/deadman_ratelimit.ksh \ functional/deadman/deadman_sync.ksh \ functional/deadman/deadman_zio.ksh \ functional/delegate/cleanup.ksh \ functional/delegate/setup.ksh \ functional/delegate/zfs_allow_001_pos.ksh \ functional/delegate/zfs_allow_002_pos.ksh \ functional/delegate/zfs_allow_003_pos.ksh \ functional/delegate/zfs_allow_004_pos.ksh \ functional/delegate/zfs_allow_005_pos.ksh \ functional/delegate/zfs_allow_006_pos.ksh \ functional/delegate/zfs_allow_007_pos.ksh \ functional/delegate/zfs_allow_008_pos.ksh \ functional/delegate/zfs_allow_009_neg.ksh \ functional/delegate/zfs_allow_010_pos.ksh \ functional/delegate/zfs_allow_011_neg.ksh \ functional/delegate/zfs_allow_012_neg.ksh \ functional/delegate/zfs_unallow_001_pos.ksh \ functional/delegate/zfs_unallow_002_pos.ksh \ functional/delegate/zfs_unallow_003_pos.ksh \ functional/delegate/zfs_unallow_004_pos.ksh \ functional/delegate/zfs_unallow_005_pos.ksh \ functional/delegate/zfs_unallow_006_pos.ksh \ functional/delegate/zfs_unallow_007_neg.ksh \ functional/delegate/zfs_unallow_008_neg.ksh \ functional/devices/cleanup.ksh \ functional/devices/devices_001_pos.ksh \ functional/devices/devices_002_neg.ksh \ functional/devices/devices_003_pos.ksh \ functional/devices/setup.ksh \ functional/dos_attributes/cleanup.ksh \ functional/dos_attributes/read_dos_attrs_001.ksh \ functional/dos_attributes/setup.ksh \ functional/dos_attributes/write_dos_attrs_001.ksh \ functional/events/cleanup.ksh \ functional/events/events_001_pos.ksh \ functional/events/events_002_pos.ksh \ functional/events/setup.ksh \ functional/events/zed_fd_spill.ksh \ functional/events/zed_rc_filter.ksh \ functional/exec/cleanup.ksh \ functional/exec/exec_001_pos.ksh \ functional/exec/exec_002_neg.ksh \ functional/exec/setup.ksh \ functional/fallocate/cleanup.ksh \ functional/fallocate/fallocate_prealloc.ksh \ functional/fallocate/fallocate_punch-hole.ksh \ functional/fallocate/fallocate_zero-range.ksh \ functional/fallocate/setup.ksh \ functional/fault/auto_offline_001_pos.ksh \ functional/fault/auto_online_001_pos.ksh \ functional/fault/auto_online_002_pos.ksh \ functional/fault/auto_replace_001_pos.ksh \ functional/fault/auto_spare_001_pos.ksh \ functional/fault/auto_spare_002_pos.ksh \ functional/fault/auto_spare_ashift.ksh \ functional/fault/auto_spare_multiple.ksh \ functional/fault/auto_spare_shared.ksh \ functional/fault/cleanup.ksh \ functional/fault/decompress_fault.ksh \ functional/fault/decrypt_fault.ksh \ functional/fault/scrub_after_resilver.ksh \ functional/fault/setup.ksh \ functional/fault/zpool_status_-s.ksh \ functional/features/async_destroy/async_destroy_001_pos.ksh \ functional/features/async_destroy/cleanup.ksh \ functional/features/async_destroy/setup.ksh \ functional/features/large_dnode/cleanup.ksh \ functional/features/large_dnode/large_dnode_001_pos.ksh \ functional/features/large_dnode/large_dnode_002_pos.ksh \ functional/features/large_dnode/large_dnode_003_pos.ksh \ functional/features/large_dnode/large_dnode_004_neg.ksh \ functional/features/large_dnode/large_dnode_005_pos.ksh \ functional/features/large_dnode/large_dnode_006_pos.ksh \ functional/features/large_dnode/large_dnode_007_neg.ksh \ functional/features/large_dnode/large_dnode_008_pos.ksh \ functional/features/large_dnode/large_dnode_009_pos.ksh \ functional/features/large_dnode/setup.ksh \ functional/grow/grow_pool_001_pos.ksh \ functional/grow/grow_replicas_001_pos.ksh \ functional/history/cleanup.ksh \ functional/history/history_001_pos.ksh \ functional/history/history_002_pos.ksh \ functional/history/history_003_pos.ksh \ functional/history/history_004_pos.ksh \ functional/history/history_005_neg.ksh \ functional/history/history_006_neg.ksh \ functional/history/history_007_pos.ksh \ functional/history/history_008_pos.ksh \ functional/history/history_009_pos.ksh \ functional/history/history_010_pos.ksh \ functional/history/setup.ksh \ functional/inheritance/cleanup.ksh \ functional/inheritance/inherit_001_pos.ksh \ functional/inuse/inuse_001_pos.ksh \ functional/inuse/inuse_003_pos.ksh \ functional/inuse/inuse_004_pos.ksh \ functional/inuse/inuse_005_pos.ksh \ functional/inuse/inuse_006_pos.ksh \ functional/inuse/inuse_007_pos.ksh \ functional/inuse/inuse_008_pos.ksh \ functional/inuse/inuse_009_pos.ksh \ functional/inuse/setup.ksh \ functional/io/cleanup.ksh \ functional/io/io_uring.ksh \ functional/io/libaio.ksh \ functional/io/mmap.ksh \ functional/io/posixaio.ksh \ functional/io/psync.ksh \ functional/io/setup.ksh \ functional/io/sync.ksh \ functional/l2arc/cleanup.ksh \ functional/l2arc/l2arc_arcstats_pos.ksh \ functional/l2arc/l2arc_l2miss_pos.ksh \ functional/l2arc/l2arc_mfuonly_pos.ksh \ functional/l2arc/persist_l2arc_001_pos.ksh \ functional/l2arc/persist_l2arc_002_pos.ksh \ functional/l2arc/persist_l2arc_003_neg.ksh \ functional/l2arc/persist_l2arc_004_pos.ksh \ functional/l2arc/persist_l2arc_005_pos.ksh \ functional/l2arc/setup.ksh \ functional/large_files/cleanup.ksh \ functional/large_files/large_files_001_pos.ksh \ functional/large_files/large_files_002_pos.ksh \ functional/large_files/setup.ksh \ functional/largest_pool/largest_pool_001_pos.ksh \ functional/libzfs/cleanup.ksh \ functional/libzfs/libzfs_input.ksh \ functional/libzfs/setup.ksh \ functional/limits/cleanup.ksh \ functional/limits/filesystem_count.ksh \ functional/limits/filesystem_limit.ksh \ functional/limits/setup.ksh \ functional/limits/snapshot_count.ksh \ functional/limits/snapshot_limit.ksh \ functional/link_count/cleanup.ksh \ functional/link_count/link_count_001.ksh \ functional/link_count/link_count_root_inode.ksh \ functional/link_count/setup.ksh \ functional/log_spacemap/log_spacemap_import_logs.ksh \ functional/migration/cleanup.ksh \ functional/migration/migration_001_pos.ksh \ functional/migration/migration_002_pos.ksh \ functional/migration/migration_003_pos.ksh \ functional/migration/migration_004_pos.ksh \ functional/migration/migration_005_pos.ksh \ functional/migration/migration_006_pos.ksh \ functional/migration/migration_007_pos.ksh \ functional/migration/migration_008_pos.ksh \ functional/migration/migration_009_pos.ksh \ functional/migration/migration_010_pos.ksh \ functional/migration/migration_011_pos.ksh \ functional/migration/migration_012_pos.ksh \ functional/migration/setup.ksh \ functional/mmap/cleanup.ksh \ functional/mmap/mmap_libaio_001_pos.ksh \ functional/mmap/mmap_read_001_pos.ksh \ functional/mmap/mmap_seek_001_pos.ksh \ functional/mmap/mmap_sync_001_pos.ksh \ functional/mmap/mmap_write_001_pos.ksh \ functional/mmap/setup.ksh \ functional/mmp/cleanup.ksh \ functional/mmp/mmp_active_import.ksh \ functional/mmp/mmp_exported_import.ksh \ functional/mmp/mmp_hostid.ksh \ functional/mmp/mmp_inactive_import.ksh \ functional/mmp/mmp_interval.ksh \ functional/mmp/mmp_on_off.ksh \ functional/mmp/mmp_on_thread.ksh \ functional/mmp/mmp_on_uberblocks.ksh \ functional/mmp/mmp_on_zdb.ksh \ functional/mmp/mmp_reset_interval.ksh \ functional/mmp/mmp_write_distribution.ksh \ functional/mmp/mmp_write_uberblocks.ksh \ functional/mmp/multihost_history.ksh \ functional/mmp/setup.ksh \ functional/mount/cleanup.ksh \ functional/mount/setup.ksh \ functional/mount/umount_001.ksh \ functional/mount/umountall_001.ksh \ functional/mount/umount_unlinked_drain.ksh \ functional/mv_files/cleanup.ksh \ functional/mv_files/mv_files_001_pos.ksh \ functional/mv_files/mv_files_002_pos.ksh \ functional/mv_files/random_creation.ksh \ functional/mv_files/setup.ksh \ functional/nestedfs/cleanup.ksh \ functional/nestedfs/nestedfs_001_pos.ksh \ functional/nestedfs/setup.ksh \ functional/nopwrite/cleanup.ksh \ functional/nopwrite/nopwrite_copies.ksh \ functional/nopwrite/nopwrite_mtime.ksh \ functional/nopwrite/nopwrite_negative.ksh \ functional/nopwrite/nopwrite_promoted_clone.ksh \ functional/nopwrite/nopwrite_recsize.ksh \ functional/nopwrite/nopwrite_sync.ksh \ functional/nopwrite/nopwrite_varying_compression.ksh \ functional/nopwrite/nopwrite_volume.ksh \ functional/nopwrite/setup.ksh \ functional/no_space/cleanup.ksh \ functional/no_space/enospc_001_pos.ksh \ functional/no_space/enospc_002_pos.ksh \ functional/no_space/enospc_003_pos.ksh \ functional/no_space/enospc_df.ksh \ functional/no_space/enospc_rm.ksh \ functional/no_space/setup.ksh \ functional/online_offline/cleanup.ksh \ functional/online_offline/online_offline_001_pos.ksh \ functional/online_offline/online_offline_002_neg.ksh \ functional/online_offline/online_offline_003_neg.ksh \ functional/online_offline/setup.ksh \ functional/pam/cleanup.ksh \ functional/pam/pam_basic.ksh \ functional/pam/pam_nounmount.ksh \ functional/pam/pam_short_password.ksh \ functional/pam/setup.ksh \ functional/pool_checkpoint/checkpoint_after_rewind.ksh \ functional/pool_checkpoint/checkpoint_big_rewind.ksh \ functional/pool_checkpoint/checkpoint_capacity.ksh \ functional/pool_checkpoint/checkpoint_conf_change.ksh \ functional/pool_checkpoint/checkpoint_discard_busy.ksh \ functional/pool_checkpoint/checkpoint_discard.ksh \ functional/pool_checkpoint/checkpoint_discard_many.ksh \ functional/pool_checkpoint/checkpoint_indirect.ksh \ functional/pool_checkpoint/checkpoint_invalid.ksh \ functional/pool_checkpoint/checkpoint_lun_expsz.ksh \ functional/pool_checkpoint/checkpoint_open.ksh \ functional/pool_checkpoint/checkpoint_removal.ksh \ functional/pool_checkpoint/checkpoint_rewind.ksh \ functional/pool_checkpoint/checkpoint_ro_rewind.ksh \ functional/pool_checkpoint/checkpoint_sm_scale.ksh \ functional/pool_checkpoint/checkpoint_twice.ksh \ functional/pool_checkpoint/checkpoint_vdev_add.ksh \ functional/pool_checkpoint/checkpoint_zdb.ksh \ functional/pool_checkpoint/checkpoint_zhack_feat.ksh \ functional/pool_checkpoint/cleanup.ksh \ functional/pool_checkpoint/setup.ksh \ functional/pool_names/pool_names_001_pos.ksh \ functional/pool_names/pool_names_002_neg.ksh \ functional/poolversion/cleanup.ksh \ functional/poolversion/poolversion_001_pos.ksh \ functional/poolversion/poolversion_002_pos.ksh \ functional/poolversion/setup.ksh \ functional/privilege/cleanup.ksh \ functional/privilege/privilege_001_pos.ksh \ functional/privilege/privilege_002_pos.ksh \ functional/privilege/setup.ksh \ functional/procfs/cleanup.ksh \ functional/procfs/pool_state.ksh \ functional/procfs/procfs_list_basic.ksh \ functional/procfs/procfs_list_concurrent_readers.ksh \ functional/procfs/procfs_list_stale_read.ksh \ functional/procfs/setup.ksh \ functional/projectquota/cleanup.ksh \ functional/projectquota/projectid_001_pos.ksh \ functional/projectquota/projectid_002_pos.ksh \ functional/projectquota/projectid_003_pos.ksh \ functional/projectquota/projectquota_001_pos.ksh \ functional/projectquota/projectquota_002_pos.ksh \ functional/projectquota/projectquota_003_pos.ksh \ functional/projectquota/projectquota_004_neg.ksh \ functional/projectquota/projectquota_005_pos.ksh \ functional/projectquota/projectquota_006_pos.ksh \ functional/projectquota/projectquota_007_pos.ksh \ functional/projectquota/projectquota_008_pos.ksh \ functional/projectquota/projectquota_009_pos.ksh \ functional/projectquota/projectspace_001_pos.ksh \ functional/projectquota/projectspace_002_pos.ksh \ functional/projectquota/projectspace_003_pos.ksh \ functional/projectquota/projectspace_004_pos.ksh \ functional/projectquota/projecttree_001_pos.ksh \ functional/projectquota/projecttree_002_pos.ksh \ functional/projectquota/projecttree_003_neg.ksh \ functional/projectquota/setup.ksh \ functional/quota/cleanup.ksh \ functional/quota/quota_001_pos.ksh \ functional/quota/quota_002_pos.ksh \ functional/quota/quota_003_pos.ksh \ functional/quota/quota_004_pos.ksh \ functional/quota/quota_005_pos.ksh \ functional/quota/quota_006_neg.ksh \ functional/quota/setup.ksh \ functional/raidz/cleanup.ksh \ functional/raidz/raidz_001_neg.ksh \ functional/raidz/raidz_002_pos.ksh \ functional/raidz/raidz_003_pos.ksh \ functional/raidz/raidz_004_pos.ksh \ functional/raidz/setup.ksh \ functional/redacted_send/cleanup.ksh \ functional/redacted_send/redacted_compressed.ksh \ functional/redacted_send/redacted_contents.ksh \ functional/redacted_send/redacted_deleted.ksh \ functional/redacted_send/redacted_disabled_feature.ksh \ functional/redacted_send/redacted_embedded.ksh \ functional/redacted_send/redacted_holes.ksh \ functional/redacted_send/redacted_incrementals.ksh \ functional/redacted_send/redacted_largeblocks.ksh \ functional/redacted_send/redacted_many_clones.ksh \ functional/redacted_send/redacted_mixed_recsize.ksh \ functional/redacted_send/redacted_mounts.ksh \ functional/redacted_send/redacted_negative.ksh \ functional/redacted_send/redacted_origin.ksh \ functional/redacted_send/redacted_panic.ksh \ functional/redacted_send/redacted_props.ksh \ functional/redacted_send/redacted_resume.ksh \ functional/redacted_send/redacted_size.ksh \ functional/redacted_send/redacted_volume.ksh \ functional/redacted_send/setup.ksh \ functional/redundancy/cleanup.ksh \ functional/redundancy/redundancy_draid1.ksh \ functional/redundancy/redundancy_draid2.ksh \ functional/redundancy/redundancy_draid3.ksh \ functional/redundancy/redundancy_draid_damaged1.ksh \ functional/redundancy/redundancy_draid_damaged2.ksh \ functional/redundancy/redundancy_draid.ksh \ functional/redundancy/redundancy_draid_spare1.ksh \ functional/redundancy/redundancy_draid_spare2.ksh \ functional/redundancy/redundancy_draid_spare3.ksh \ functional/redundancy/redundancy_mirror.ksh \ functional/redundancy/redundancy_raidz1.ksh \ functional/redundancy/redundancy_raidz2.ksh \ functional/redundancy/redundancy_raidz3.ksh \ functional/redundancy/redundancy_raidz.ksh \ functional/redundancy/redundancy_stripe.ksh \ functional/redundancy/setup.ksh \ functional/refquota/cleanup.ksh \ functional/refquota/refquota_001_pos.ksh \ functional/refquota/refquota_002_pos.ksh \ functional/refquota/refquota_003_pos.ksh \ functional/refquota/refquota_004_pos.ksh \ functional/refquota/refquota_005_pos.ksh \ functional/refquota/refquota_006_neg.ksh \ functional/refquota/refquota_007_neg.ksh \ functional/refquota/refquota_008_neg.ksh \ functional/refquota/setup.ksh \ functional/refreserv/cleanup.ksh \ functional/refreserv/refreserv_001_pos.ksh \ functional/refreserv/refreserv_002_pos.ksh \ functional/refreserv/refreserv_003_pos.ksh \ functional/refreserv/refreserv_004_pos.ksh \ functional/refreserv/refreserv_005_pos.ksh \ functional/refreserv/refreserv_multi_raidz.ksh \ functional/refreserv/refreserv_raidz.ksh \ functional/refreserv/setup.ksh \ functional/removal/cleanup.ksh \ functional/removal/removal_all_vdev.ksh \ functional/removal/removal_cancel.ksh \ functional/removal/removal_check_space.ksh \ functional/removal/removal_condense_export.ksh \ functional/removal/removal_multiple_indirection.ksh \ functional/removal/removal_nopwrite.ksh \ functional/removal/removal_remap_deadlists.ksh \ functional/removal/removal_reservation.ksh \ functional/removal/removal_resume_export.ksh \ functional/removal/removal_sanity.ksh \ functional/removal/removal_with_add.ksh \ functional/removal/removal_with_create_fs.ksh \ functional/removal/removal_with_dedup.ksh \ functional/removal/removal_with_errors.ksh \ functional/removal/removal_with_export.ksh \ functional/removal/removal_with_faulted.ksh \ functional/removal/removal_with_ganging.ksh \ functional/removal/removal_with_remove.ksh \ functional/removal/removal_with_scrub.ksh \ functional/removal/removal_with_send.ksh \ functional/removal/removal_with_send_recv.ksh \ functional/removal/removal_with_snapshot.ksh \ functional/removal/removal_with_write.ksh \ functional/removal/removal_with_zdb.ksh \ functional/removal/remove_attach_mirror.ksh \ functional/removal/remove_expanded.ksh \ functional/removal/remove_indirect.ksh \ functional/removal/remove_mirror.ksh \ functional/removal/remove_mirror_sanity.ksh \ functional/removal/remove_raidz.ksh \ functional/rename_dirs/cleanup.ksh \ functional/rename_dirs/rename_dirs_001_pos.ksh \ functional/rename_dirs/setup.ksh \ functional/replacement/attach_import.ksh \ functional/replacement/attach_multiple.ksh \ functional/replacement/attach_rebuild.ksh \ functional/replacement/attach_resilver.ksh \ functional/replacement/cleanup.ksh \ functional/replacement/detach.ksh \ functional/replacement/rebuild_disabled_feature.ksh \ functional/replacement/rebuild_multiple.ksh \ functional/replacement/rebuild_raidz.ksh \ functional/replacement/replace_import.ksh \ functional/replacement/replace_rebuild.ksh \ functional/replacement/replace_resilver.ksh \ functional/replacement/resilver_restart_001.ksh \ functional/replacement/resilver_restart_002.ksh \ functional/replacement/scrub_cancel.ksh \ functional/replacement/setup.ksh \ functional/reservation/cleanup.ksh \ functional/reservation/reservation_001_pos.ksh \ functional/reservation/reservation_002_pos.ksh \ functional/reservation/reservation_003_pos.ksh \ functional/reservation/reservation_004_pos.ksh \ functional/reservation/reservation_005_pos.ksh \ functional/reservation/reservation_006_pos.ksh \ functional/reservation/reservation_007_pos.ksh \ functional/reservation/reservation_008_pos.ksh \ functional/reservation/reservation_009_pos.ksh \ functional/reservation/reservation_010_pos.ksh \ functional/reservation/reservation_011_pos.ksh \ functional/reservation/reservation_012_pos.ksh \ functional/reservation/reservation_013_pos.ksh \ functional/reservation/reservation_014_pos.ksh \ functional/reservation/reservation_015_pos.ksh \ functional/reservation/reservation_016_pos.ksh \ functional/reservation/reservation_017_pos.ksh \ functional/reservation/reservation_018_pos.ksh \ functional/reservation/reservation_019_pos.ksh \ functional/reservation/reservation_020_pos.ksh \ functional/reservation/reservation_021_neg.ksh \ functional/reservation/reservation_022_pos.ksh \ functional/reservation/setup.ksh \ functional/rootpool/cleanup.ksh \ functional/rootpool/rootpool_002_neg.ksh \ functional/rootpool/rootpool_003_neg.ksh \ functional/rootpool/rootpool_007_pos.ksh \ functional/rootpool/setup.ksh \ functional/rsend/cleanup.ksh \ functional/rsend/recv_dedup_encrypted_zvol.ksh \ functional/rsend/recv_dedup.ksh \ functional/rsend/rsend_001_pos.ksh \ functional/rsend/rsend_002_pos.ksh \ functional/rsend/rsend_003_pos.ksh \ functional/rsend/rsend_004_pos.ksh \ functional/rsend/rsend_005_pos.ksh \ functional/rsend/rsend_006_pos.ksh \ functional/rsend/rsend_007_pos.ksh \ functional/rsend/rsend_008_pos.ksh \ functional/rsend/rsend_009_pos.ksh \ functional/rsend/rsend_010_pos.ksh \ functional/rsend/rsend_011_pos.ksh \ functional/rsend/rsend_012_pos.ksh \ functional/rsend/rsend_013_pos.ksh \ functional/rsend/rsend_014_pos.ksh \ functional/rsend/rsend_016_neg.ksh \ functional/rsend/rsend_019_pos.ksh \ functional/rsend/rsend_020_pos.ksh \ functional/rsend/rsend_021_pos.ksh \ functional/rsend/rsend_022_pos.ksh \ functional/rsend/rsend_024_pos.ksh \ functional/rsend/rsend_025_pos.ksh \ functional/rsend/rsend_026_neg.ksh \ functional/rsend/rsend_027_pos.ksh \ functional/rsend/rsend_028_neg.ksh \ functional/rsend/rsend_029_neg.ksh \ functional/rsend/send-c_embedded_blocks.ksh \ functional/rsend/send-c_incremental.ksh \ functional/rsend/send-c_lz4_disabled.ksh \ functional/rsend/send-c_mixed_compression.ksh \ functional/rsend/send-cpL_varied_recsize.ksh \ functional/rsend/send-c_props.ksh \ functional/rsend/send-c_recv_dedup.ksh \ functional/rsend/send-c_recv_lz4_disabled.ksh \ functional/rsend/send-c_resume.ksh \ functional/rsend/send-c_stream_size_estimate.ksh \ functional/rsend/send-c_verify_contents.ksh \ functional/rsend/send-c_verify_ratio.ksh \ functional/rsend/send-c_volume.ksh \ functional/rsend/send-c_zstreamdump.ksh \ functional/rsend/send_doall.ksh \ functional/rsend/send_encrypted_files.ksh \ functional/rsend/send_encrypted_hierarchy.ksh \ functional/rsend/send_encrypted_props.ksh \ functional/rsend/send_encrypted_truncated_files.ksh \ functional/rsend/send_freeobjects.ksh \ functional/rsend/send_holds.ksh \ functional/rsend/send_hole_birth.ksh \ functional/rsend/send_invalid.ksh \ functional/rsend/send-L_toggle.ksh \ functional/rsend/send_mixed_raw.ksh \ functional/rsend/send_partial_dataset.ksh \ functional/rsend/send_raw_ashift.ksh \ functional/rsend/send_raw_spill_block.ksh \ functional/rsend/send_realloc_dnode_size.ksh \ functional/rsend/send_realloc_encrypted_files.ksh \ functional/rsend/send_realloc_files.ksh \ functional/rsend/send_spill_block.ksh \ functional/rsend/send-wR_encrypted_zvol.ksh \ functional/rsend/setup.ksh \ functional/scrub_mirror/cleanup.ksh \ functional/scrub_mirror/scrub_mirror_001_pos.ksh \ functional/scrub_mirror/scrub_mirror_002_pos.ksh \ functional/scrub_mirror/scrub_mirror_003_pos.ksh \ functional/scrub_mirror/scrub_mirror_004_pos.ksh \ functional/scrub_mirror/setup.ksh \ functional/slog/cleanup.ksh \ functional/slog/setup.ksh \ functional/slog/slog_001_pos.ksh \ functional/slog/slog_002_pos.ksh \ functional/slog/slog_003_pos.ksh \ functional/slog/slog_004_pos.ksh \ functional/slog/slog_005_pos.ksh \ functional/slog/slog_006_pos.ksh \ functional/slog/slog_007_pos.ksh \ functional/slog/slog_008_neg.ksh \ functional/slog/slog_009_neg.ksh \ functional/slog/slog_010_neg.ksh \ functional/slog/slog_011_neg.ksh \ functional/slog/slog_012_neg.ksh \ functional/slog/slog_013_pos.ksh \ functional/slog/slog_014_pos.ksh \ functional/slog/slog_015_neg.ksh \ functional/slog/slog_016_pos.ksh \ functional/slog/slog_replay_fs_001.ksh \ functional/slog/slog_replay_fs_002.ksh \ functional/slog/slog_replay_volume.ksh \ functional/snapshot/cleanup.ksh \ functional/snapshot/clone_001_pos.ksh \ functional/snapshot/rollback_001_pos.ksh \ functional/snapshot/rollback_002_pos.ksh \ functional/snapshot/rollback_003_pos.ksh \ functional/snapshot/setup.ksh \ functional/snapshot/snapshot_001_pos.ksh \ functional/snapshot/snapshot_002_pos.ksh \ functional/snapshot/snapshot_003_pos.ksh \ functional/snapshot/snapshot_004_pos.ksh \ functional/snapshot/snapshot_005_pos.ksh \ functional/snapshot/snapshot_006_pos.ksh \ functional/snapshot/snapshot_007_pos.ksh \ functional/snapshot/snapshot_008_pos.ksh \ functional/snapshot/snapshot_009_pos.ksh \ functional/snapshot/snapshot_010_pos.ksh \ functional/snapshot/snapshot_011_pos.ksh \ functional/snapshot/snapshot_012_pos.ksh \ functional/snapshot/snapshot_013_pos.ksh \ functional/snapshot/snapshot_014_pos.ksh \ functional/snapshot/snapshot_015_pos.ksh \ functional/snapshot/snapshot_016_pos.ksh \ functional/snapshot/snapshot_017_pos.ksh \ functional/snapshot/snapshot_018_pos.ksh \ functional/snapused/cleanup.ksh \ functional/snapused/setup.ksh \ functional/snapused/snapused_001_pos.ksh \ functional/snapused/snapused_002_pos.ksh \ functional/snapused/snapused_003_pos.ksh \ functional/snapused/snapused_004_pos.ksh \ functional/snapused/snapused_005_pos.ksh \ functional/sparse/cleanup.ksh \ functional/sparse/setup.ksh \ functional/sparse/sparse_001_pos.ksh \ functional/stat/cleanup.ksh \ functional/stat/setup.ksh \ functional/stat/stat_001_pos.ksh \ functional/suid/cleanup.ksh \ functional/suid/setup.ksh \ functional/suid/suid_write_to_none.ksh \ functional/suid/suid_write_to_sgid.ksh \ functional/suid/suid_write_to_suid.ksh \ functional/suid/suid_write_to_suid_sgid.ksh \ functional/suid/suid_write_zil_replay.ksh \ functional/trim/autotrim_config.ksh \ functional/trim/autotrim_integrity.ksh \ functional/trim/autotrim_trim_integrity.ksh \ functional/trim/cleanup.ksh \ functional/trim/setup.ksh \ functional/trim/trim_config.ksh \ functional/trim/trim_integrity.ksh \ functional/trim/trim_l2arc.ksh \ functional/truncate/cleanup.ksh \ functional/truncate/setup.ksh \ functional/truncate/truncate_001_pos.ksh \ functional/truncate/truncate_002_pos.ksh \ functional/truncate/truncate_timestamps.ksh \ functional/upgrade/cleanup.ksh \ functional/upgrade/setup.ksh \ functional/upgrade/upgrade_projectquota_001_pos.ksh \ functional/upgrade/upgrade_readonly_pool.ksh \ functional/upgrade/upgrade_userobj_001_pos.ksh \ functional/user_namespace/cleanup.ksh \ functional/user_namespace/setup.ksh \ functional/user_namespace/user_namespace_001.ksh \ functional/user_namespace/user_namespace_002.ksh \ functional/user_namespace/user_namespace_003.ksh \ functional/user_namespace/user_namespace_004.ksh \ functional/userquota/cleanup.ksh \ functional/userquota/groupspace_001_pos.ksh \ functional/userquota/groupspace_002_pos.ksh \ functional/userquota/groupspace_003_pos.ksh \ functional/userquota/setup.ksh \ functional/userquota/userquota_001_pos.ksh \ functional/userquota/userquota_002_pos.ksh \ functional/userquota/userquota_003_pos.ksh \ functional/userquota/userquota_004_pos.ksh \ functional/userquota/userquota_005_neg.ksh \ functional/userquota/userquota_006_pos.ksh \ functional/userquota/userquota_007_pos.ksh \ functional/userquota/userquota_008_pos.ksh \ functional/userquota/userquota_009_pos.ksh \ functional/userquota/userquota_010_pos.ksh \ functional/userquota/userquota_011_pos.ksh \ functional/userquota/userquota_012_neg.ksh \ functional/userquota/userquota_013_pos.ksh \ functional/userquota/userspace_001_pos.ksh \ functional/userquota/userspace_002_pos.ksh \ functional/userquota/userspace_003_pos.ksh \ functional/userquota/userspace_encrypted.ksh \ functional/userquota/userspace_send_encrypted.ksh \ functional/vdev_zaps/cleanup.ksh \ functional/vdev_zaps/setup.ksh \ functional/vdev_zaps/vdev_zaps_001_pos.ksh \ functional/vdev_zaps/vdev_zaps_002_pos.ksh \ functional/vdev_zaps/vdev_zaps_003_pos.ksh \ functional/vdev_zaps/vdev_zaps_004_pos.ksh \ functional/vdev_zaps/vdev_zaps_005_pos.ksh \ functional/vdev_zaps/vdev_zaps_006_pos.ksh \ functional/vdev_zaps/vdev_zaps_007_pos.ksh \ functional/write_dirs/cleanup.ksh \ functional/write_dirs/setup.ksh \ functional/write_dirs/write_dirs_001_pos.ksh \ functional/write_dirs/write_dirs_002_pos.ksh \ functional/xattr/cleanup.ksh \ functional/xattr/setup.ksh \ functional/xattr/xattr_001_pos.ksh \ functional/xattr/xattr_002_neg.ksh \ functional/xattr/xattr_003_neg.ksh \ functional/xattr/xattr_004_pos.ksh \ functional/xattr/xattr_005_pos.ksh \ functional/xattr/xattr_006_pos.ksh \ functional/xattr/xattr_007_neg.ksh \ functional/xattr/xattr_008_pos.ksh \ functional/xattr/xattr_009_neg.ksh \ functional/xattr/xattr_010_neg.ksh \ functional/xattr/xattr_011_pos.ksh \ functional/xattr/xattr_012_pos.ksh \ functional/xattr/xattr_013_pos.ksh \ functional/xattr/xattr_compat.ksh \ functional/zpool_influxdb/cleanup.ksh \ functional/zpool_influxdb/setup.ksh \ functional/zpool_influxdb/zpool_influxdb.ksh \ functional/zvol/zvol_cli/cleanup.ksh \ functional/zvol/zvol_cli/setup.ksh \ functional/zvol/zvol_cli/zvol_cli_001_pos.ksh \ functional/zvol/zvol_cli/zvol_cli_002_pos.ksh \ functional/zvol/zvol_cli/zvol_cli_003_neg.ksh \ functional/zvol/zvol_ENOSPC/cleanup.ksh \ functional/zvol/zvol_ENOSPC/setup.ksh \ functional/zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos.ksh \ functional/zvol/zvol_misc/cleanup.ksh \ functional/zvol/zvol_misc/setup.ksh \ functional/zvol/zvol_misc/zvol_misc_001_neg.ksh \ functional/zvol/zvol_misc/zvol_misc_002_pos.ksh \ functional/zvol/zvol_misc/zvol_misc_003_neg.ksh \ functional/zvol/zvol_misc/zvol_misc_004_pos.ksh \ functional/zvol/zvol_misc/zvol_misc_005_neg.ksh \ functional/zvol/zvol_misc/zvol_misc_006_pos.ksh \ functional/zvol/zvol_misc/zvol_misc_fua.ksh \ functional/zvol/zvol_misc/zvol_misc_hierarchy.ksh \ functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh \ functional/zvol/zvol_misc/zvol_misc_snapdev.ksh \ functional/zvol/zvol_misc/zvol_misc_trim.ksh \ functional/zvol/zvol_misc/zvol_misc_volmode.ksh \ functional/zvol/zvol_misc/zvol_misc_zil.ksh \ functional/zvol/zvol_stress/cleanup.ksh \ functional/zvol/zvol_stress/setup.ksh \ functional/zvol/zvol_stress/zvol_stress.ksh \ functional/zvol/zvol_swap/cleanup.ksh \ functional/zvol/zvol_swap/setup.ksh \ functional/zvol/zvol_swap/zvol_swap_001_pos.ksh \ functional/zvol/zvol_swap/zvol_swap_002_pos.ksh \ functional/zvol/zvol_swap/zvol_swap_003_pos.ksh \ functional/zvol/zvol_swap/zvol_swap_004_pos.ksh \ functional/zvol/zvol_swap/zvol_swap_005_pos.ksh \ functional/zvol/zvol_swap/zvol_swap_006_pos.ksh diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_001_pos.ksh index 136bb6b11c2c..9006afbeed97 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_001_pos.ksh @@ -1,75 +1,73 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # # Copyright (c) 2016 by Delphix. All rights reserved. # . $STF_SUITE/tests/functional/atime/atime_common.kshlib # # DESCRIPTION: # When atime=on, verify the access time for files is updated when read. It # is available to fs and clone. To snapshot, it is unavailable. # # STRATEGY: # 1. Create pool and fs. # 2. Create '$TESTFILE' for fs. # 3. Create snapshot and clone. # 4. Setting atime=on on datasets except snapshot, and read '$TESTFILE'. # 5. Expect the access time is updated on datasets except snapshot. # verify_runnable "both" log_assert "Setting atime=on, the access time for files is updated when read." log_onexit cleanup # # Create $TESTFILE, snapshot and clone. # setup_snap_clone for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP do typeset mtpt=$(get_prop mountpoint $dst) if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then mtpt=$(snapshot_mountpoint $dst) log_mustnot check_atime_updated $mtpt/$TESTFILE else - if is_linux; then - log_must zfs set relatime=off $dst - fi - log_must zfs set atime=on $dst + log_must zfs set relatime=off $dst + log_must check_atime_updated $mtpt/$TESTFILE log_must check_atime_updated $mtpt/$TESTFILE fi done log_pass "Verify the property atime=on passed." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_003_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_003_pos.ksh index f5c1044dcc8e..28d973c71f83 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_003_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/atime_003_pos.ksh @@ -1,70 +1,71 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # . $STF_SUITE/tests/functional/atime/atime_common.kshlib # # DESCRIPTION: # When relatime=on, verify the access time for files is updated when first # read but not on second. # It is available to fs and clone. To snapshot, it is unavailable. # # STRATEGY: # 1. Create pool and fs. # 2. Create '$TESTFILE' for fs. # 3. Create snapshot and clone. # 4. Setting atime=on and relatime=on on datasets. # 5. Expect the access time is updated for first read but not on second. # verify_runnable "both" log_assert "Setting relatime=on, the access time for files is updated when \ when read the first time, but not second time." log_onexit cleanup # # Create $TESTFILE, snapshot and clone. # setup_snap_clone for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP do typeset mtpt=$(get_prop mountpoint $dst) if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then mtpt=$(snapshot_mountpoint $dst) log_mustnot check_atime_updated $mtpt/$TESTFILE else log_must zfs set atime=on $dst log_must zfs set relatime=on $dst + log_must check_atime_updated $mtpt/$TESTFILE log_mustnot check_atime_updated $mtpt/$TESTFILE fi done log_pass "Verify the property relatime=on passed." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh index 1e3b543a6d42..3b6a0d9004ce 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh @@ -1,78 +1,77 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # # Copyright (c) 2016 by Delphix. All rights reserved. # Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. # . $STF_SUITE/tests/functional/atime/atime_common.kshlib # # DESCRIPTION: # When atime=on, verify the access time for files is updated when read. It # is available to fs and clone. To snapshot, it is unavailable. # # STRATEGY: # 1. Create pool and fs. # 2. Create '$TESTFILE' for fs. # 3. Create snapshot and clone. # 4. Setting atime=on on datasets except snapshot, and read '$TESTFILE'. # 5. Expect the access time is updated on datasets except snapshot. # verify_runnable "both" log_assert "Setting atime=on, the access time for files is updated when read." log_onexit cleanup # # Create $TESTFILE, snapshot and clone. # Same as 001 except that atime/relatime applies to root dataset (OpenZFS#8675). # setup_snap_clone reset_atime for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP do typeset mtpt=$(get_prop mountpoint $dst) if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then mtpt=$(snapshot_mountpoint $dst) log_mustnot check_atime_updated $mtpt/$TESTFILE else - if is_linux; then - log_must zfs set relatime=off $(dirname $dst) - fi - log_must zfs set atime=on $(dirname $dst) + # inherited relatime won't apply because of mount option, set explicitly + log_must zfs set relatime=off $dst + log_must check_atime_updated $mtpt/$TESTFILE log_must check_atime_updated $mtpt/$TESTFILE fi done log_pass "Verify the property atime=on passed." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh index 36dbaaee7793..062325938ef2 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh @@ -1,76 +1,77 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # # Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. # . $STF_SUITE/tests/functional/atime/atime_common.kshlib # # DESCRIPTION: # When relatime=on, verify the access time for files is updated when first # read but not on second. # It is available to fs and clone. To snapshot, it is unavailable. # # STRATEGY: # 1. Create pool and fs. # 2. Create '$TESTFILE' for fs. # 3. Create snapshot and clone. # 4. Setting atime=on and relatime=on on datasets. # 5. Expect the access time is updated for first read but not on second. # verify_runnable "both" log_assert "Setting relatime=on, the access time for files is updated when \ when read the first time, but not second time." log_onexit cleanup # # Create $TESTFILE, snapshot and clone. # Same as 003 except that atime/relatime applies to root dataset (OpenZFS#8675). # setup_snap_clone reset_atime for dst in $TESTPOOL/$TESTFS $TESTPOOL/$TESTCLONE $TESTPOOL/$TESTFS@$TESTSNAP do typeset mtpt=$(get_prop mountpoint $dst) if [[ $dst == $TESTPOOL/$TESTFS@$TESTSNAP ]]; then mtpt=$(snapshot_mountpoint $dst) log_mustnot check_atime_updated $mtpt/$TESTFILE else log_must zfs set atime=on $(dirname $dst) log_must zfs set relatime=on $(dirname $dst) + log_must check_atime_updated $mtpt/$TESTFILE log_mustnot check_atime_updated $mtpt/$TESTFILE fi done log_pass "Verify the property relatime=on passed." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh index be250158bf7e..74caa12a9cc4 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh @@ -1,112 +1,115 @@ #!/bin/ksh -p # # CDDL HEADER START # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # # CDDL HEADER END # # # Copyright (c) 2020, George Amanakis. All rights reserved. # . $STF_SUITE/tests/functional/cache/cache.cfg . $STF_SUITE/tests/functional/cache/cache.kshlib # # DESCRIPTION: # Looping around a cache device with l2arc_write_size exceeding # the device size succeeds. # # STRATEGY: # 1. Create pool with a cache device. # 2. Set l2arc_write_max to a value larger than the cache device. # 3. Create a file larger than the cache device and random read # for 10 sec. # 4. Verify that l2arc_write_max is set back to the default. # 5. Set l2arc_write_max to a value less than the cache device size but -# larger than the default (64MB). +# larger than the default (256MB). # 6. Record the l2_size. # 7. Random read for 1 sec. # 8. Record the l2_size again. # 9. If (6) <= (8) then we have not looped around yet. # 10. If (6) > (8) then we looped around. Break out of the loop and test. # 11. Destroy pool. # verify_runnable "global" command -v fio > /dev/null || log_unsupported "fio missing" log_assert "Looping around a cache device succeeds." function cleanup { if poolexists $TESTPOOL ; then destroy_pool $TESTPOOL fi log_must set_tunable32 L2ARC_WRITE_MAX $write_max log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch } log_onexit cleanup typeset write_max=$(get_tunable L2ARC_WRITE_MAX) typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH) log_must set_tunable32 L2ARC_NOPREFETCH 0 typeset VDEV="$VDIR/vdev.disk" typeset VDEV_SZ=$(( 4 * 1024 * 1024 * 1024 )) typeset VCACHE="$VDIR/vdev.cache" typeset VCACHE_SZ=$(( $VDEV_SZ / 2 )) typeset fill_mb=$(( floor($VDEV_SZ * 3 / 4 ) )) export DIRECTORY=/$TESTPOOL export NUMJOBS=4 export RUNTIME=10 export PERF_RANDSEED=1234 export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 export DIRECT=1 export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) )) log_must set_tunable32 L2ARC_WRITE_MAX $(( $VCACHE_SZ * 2 )) log_must truncate -s $VCACHE_SZ $VCACHE log_must truncate -s $VDEV_SZ $VDEV log_must zpool create -f $TESTPOOL $VDEV cache $VCACHE +# Actually, this test relies on atime writes to force the L2 ARC discards +log_must zfs set relatime=off $TESTPOOL + log_must fio $FIO_SCRIPTS/mkfiles.fio log_must fio $FIO_SCRIPTS/random_reads.fio typeset write_max2=$(get_tunable L2ARC_WRITE_MAX) log_must test $write_max2 -eq $write_max -log_must set_tunable32 L2ARC_WRITE_MAX $(( 64 * 1024 * 1024 )) +log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 )) export RUNTIME=1 typeset do_once=true while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do typeset l2_size1=$(get_arcstat l2_size) log_must fio $FIO_SCRIPTS/random_reads.fio typeset l2_size2=$(get_arcstat l2_size) do_once=false done log_must test $l2_size1 -gt $l2_size2 log_must zpool destroy $TESTPOOL log_pass "Looping around a cache device succeeds." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh index 231c46d4fddf..a243944e48ea 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh @@ -1,70 +1,71 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright (c) 2019, Delphix. All rights reserved. # Copyright (c) 2021, George Amanakis. All rights reserved. # . $STF_SUITE/include/libtest.shlib # # DESCRIPTION: # Verify correct output with 'zpool status -v' after corrupting a file # # STRATEGY: # 1. Create a pool and a file # 2. zinject checksum errors # 3. Read the file # 4. Take a snapshot and make a clone # 5. Verify we see "snapshot, clone and filesystem" output in 'zpool status -v' function cleanup { log_must zinject -c all datasetexists $TESTPOOL2 && log_must zpool destroy $TESTPOOL2 rm -f $TESTDIR/vdev_a } verify_runnable "both" log_assert "Verify correct 'zpool status -v' output with a corrupted file" log_onexit cleanup truncate -s $MINVDEVSIZE $TESTDIR/vdev_a log_must zpool create -f $TESTPOOL2 $TESTDIR/vdev_a log_must fio --rw=write --name=job --size=10M --filename=/$TESTPOOL2/10m_file log_must zinject -t data -e checksum -f 100 -am /$TESTPOOL2/10m_file # Try to read the 2nd megabyte of 10m_file dd if=/$TESTPOOL2/10m_file bs=1M || true log_must zfs snapshot $TESTPOOL2@snap log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone # Look to see that snapshot, clone and filesystem our files report errors +log_must zpool status -v $TESTPOOL2 log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh index 1ac3e03b710d..111d598dfb7d 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh @@ -1,81 +1,82 @@ #!/bin/ksh -p # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or https://opensource.org/licenses/CDDL-1.0. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright (c) 2019, by Delphix. All rights reserved. # Copyright (c) 2021, George Amanakis. All rights reserved. # . $STF_SUITE/include/libtest.shlib # # DESCRIPTION: # Verify feature@head_errlog=disabled works. # # STRATEGY: # 1. Create a pool with feature@head_errlog=disabled and a file # 2. zinject checksum errors # 3. Read the file # 4. Take a snapshot and make a clone # 5. Verify that zpool status displays the old behaviour. function cleanup { log_must zinject -c all datasetexists $TESTPOOL2 && log_must zpool destroy $TESTPOOL2 rm -f $TESTDIR/vdev_a } verify_runnable "both" log_assert "Verify 'zpool status -v' with feature@head_errlog=disabled works" log_onexit cleanup truncate -s $MINVDEVSIZE $TESTDIR/vdev_a log_must zpool create -f -o feature@head_errlog=disabled $TESTPOOL2 $TESTDIR/vdev_a state=$(zpool list -Ho feature@head_errlog $TESTPOOL2) if [[ "$state" != "disabled" ]]; then log_fail "head_errlog has state $state" fi log_must fio --rw=write --name=job --size=10M --filename=/$TESTPOOL2/10m_file log_must zinject -t data -e checksum -f 100 -am /$TESTPOOL2/10m_file # Try to read the file dd if=/$TESTPOOL2/10m_file bs=1M || true log_must zfs snapshot $TESTPOOL2@snap log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone # Check that snapshot and clone do not report the error. log_mustnot eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" # Check that enabling the feature reports the error properly. log_must zpool set feature@head_errlog=enabled $TESTPOOL2 +log_must zpool status -v $TESTPOOL2 log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" log_pass "'zpool status -v' with feature@head_errlog=disabled works" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh new file mode 100755 index 000000000000..3eb7825ca295 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 George Amanakis. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +# +# DESCRIPTION: +# Verify correct output with 'zpool status -v' after corrupting a file +# +# STRATEGY: +# 1. Create a pool, an ancrypted filesystem and a file +# 2. zinject checksum errors +# 3. Unmount the filesystem and unload the key +# 4. Scrub the pool +# 5. Verify we report errors in the pool in 'zpool status -v' + +verify_runnable "both" + +DISK=${DISKS%% *} + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL2 + rm -f $TESTDIR/vdev_a +} + +log_assert "Verify reporting errors with unloaded keys works" +log_onexit cleanup + +typeset passphrase="password" +typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0" + +truncate -s $MINVDEVSIZE $TESTDIR/vdev_a +log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a + +log_must eval "echo $passphrase > /$TESTPOOL2/pwd" + +log_must zfs create -o encryption=aes-256-ccm -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL2/pwd -o primarycache=none \ + $TESTPOOL2/$TESTFS1 + +log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync +log_must eval "echo 'aaaaaaaa' >> "$file + +corrupt_blocks_at_level $file 0 +log_must zfs unmount $TESTPOOL2/$TESTFS1 +log_must zfs unload-key $TESTPOOL2/$TESTFS1 +log_must zpool sync $TESTPOOL2 +log_must zpool scrub $TESTPOOL2 +log_must zpool wait -t scrub $TESTPOOL2 +log_must zpool status -v $TESTPOOL2 +log_must eval "zpool status -v $TESTPOOL2 | \ + grep \"Permanent errors have been detected\"" + +log_pass "Verify reporting errors with unloaded keys works" diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 9cc657919f6a..88cb658c50eb 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -1,979 +1,979 @@ /* * $FreeBSD$ */ /* zfs_config.h. Generated from zfs_config.h.in by configure. */ /* zfs_config.h.in. Generated from configure.ac by autoheader. */ /* Define to 1 if translation of program messages to the user's native language is requested. */ /* #undef ENABLE_NLS */ /* bio_end_io_t wants 1 arg */ /* #undef HAVE_1ARG_BIO_END_IO_T */ /* lookup_bdev() wants 1 arg */ /* #undef HAVE_1ARG_LOOKUP_BDEV */ /* submit_bio() wants 1 arg */ /* #undef HAVE_1ARG_SUBMIT_BIO */ /* bdi_setup_and_register() wants 2 args */ /* #undef HAVE_2ARGS_BDI_SETUP_AND_REGISTER */ /* vfs_getattr wants 2 args */ /* #undef HAVE_2ARGS_VFS_GETATTR */ /* zlib_deflate_workspacesize() wants 2 args */ /* #undef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */ /* bdi_setup_and_register() wants 3 args */ /* #undef HAVE_3ARGS_BDI_SETUP_AND_REGISTER */ /* vfs_getattr wants 3 args */ /* #undef HAVE_3ARGS_VFS_GETATTR */ /* vfs_getattr wants 4 args */ /* #undef HAVE_4ARGS_VFS_GETATTR */ /* kernel has access_ok with 'type' parameter */ /* #undef HAVE_ACCESS_OK_TYPE */ /* posix_acl has refcount_t */ /* #undef HAVE_ACL_REFCOUNT */ /* add_disk() returns int */ /* #undef HAVE_ADD_DISK_RET */ /* Define if host toolchain supports AES */ #define HAVE_AES 1 /* Define if you have [rt] */ #define HAVE_AIO_H 1 #ifdef __amd64__ #ifndef RESCUE /* Define if host toolchain supports AVX */ #define HAVE_AVX 1 #endif /* Define if host toolchain supports AVX2 */ #define HAVE_AVX2 1 /* Define if host toolchain supports AVX512BW */ #define HAVE_AVX512BW 1 /* Define if host toolchain supports AVX512CD */ #define HAVE_AVX512CD 1 /* Define if host toolchain supports AVX512DQ */ #define HAVE_AVX512DQ 1 /* Define if host toolchain supports AVX512ER */ #define HAVE_AVX512ER 1 /* Define if host toolchain supports AVX512F */ #define HAVE_AVX512F 1 /* Define if host toolchain supports AVX512IFMA */ #define HAVE_AVX512IFMA 1 /* Define if host toolchain supports AVX512PF */ #define HAVE_AVX512PF 1 /* Define if host toolchain supports AVX512VBMI */ #define HAVE_AVX512VBMI 1 /* Define if host toolchain supports AVX512VL */ #define HAVE_AVX512VL 1 #endif /* bdevname() is available */ /* #undef HAVE_BDEVNAME */ /* bdev_check_media_change() exists */ /* #undef HAVE_BDEV_CHECK_MEDIA_CHANGE */ /* bdev_*_io_acct() available */ /* #undef HAVE_BDEV_IO_ACCT */ /* bdev_max_discard_sectors() is available */ /* #undef HAVE_BDEV_MAX_DISCARD_SECTORS */ /* bdev_max_secure_erase_sectors() is available */ /* #undef HAVE_BDEV_MAX_SECURE_ERASE_SECTORS */ /* block_device_operations->submit_bio() returns void */ /* #undef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID */ /* bdev_whole() is available */ /* #undef HAVE_BDEV_WHOLE */ /* bio_alloc() takes 4 arguments */ /* #undef HAVE_BIO_ALLOC_4ARG */ /* bio->bi_bdev->bd_disk exists */ /* #undef HAVE_BIO_BDEV_DISK */ /* bio->bi_opf is defined */ /* #undef HAVE_BIO_BI_OPF */ /* bio->bi_status exists */ /* #undef HAVE_BIO_BI_STATUS */ /* bio has bi_iter */ /* #undef HAVE_BIO_BVEC_ITER */ /* bio_*_io_acct() available */ /* #undef HAVE_BIO_IO_ACCT */ /* bio_max_segs() is implemented */ /* #undef HAVE_BIO_MAX_SEGS */ /* bio_set_dev() is available */ /* #undef HAVE_BIO_SET_DEV */ /* bio_set_dev() GPL-only */ /* #undef HAVE_BIO_SET_DEV_GPL_ONLY */ /* bio_set_dev() is a macro */ /* #undef HAVE_BIO_SET_DEV_MACRO */ /* bio_set_op_attrs is available */ /* #undef HAVE_BIO_SET_OP_ATTRS */ /* blkdev_get_by_path() handles ERESTARTSYS */ /* #undef HAVE_BLKDEV_GET_ERESTARTSYS */ /* blkdev_issue_discard() is available */ /* #undef HAVE_BLKDEV_ISSUE_DISCARD */ /* blkdev_issue_secure_erase() is available */ /* #undef HAVE_BLKDEV_ISSUE_SECURE_ERASE */ /* blkdev_reread_part() exists */ /* #undef HAVE_BLKDEV_REREAD_PART */ /* blkg_tryget() is available */ /* #undef HAVE_BLKG_TRYGET */ /* blkg_tryget() GPL-only */ /* #undef HAVE_BLKG_TRYGET_GPL_ONLY */ /* blk_alloc_disk() exists */ /* #undef HAVE_BLK_ALLOC_DISK */ /* blk_alloc_queue() expects request function */ /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN */ /* blk_alloc_queue_rh() expects request function */ /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH */ /* blk_cleanup_disk() exists */ /* #undef HAVE_BLK_CLEANUP_DISK */ /* block multiqueue is available */ /* #undef HAVE_BLK_MQ */ /* blk queue backing_dev_info is dynamic */ /* #undef HAVE_BLK_QUEUE_BDI_DYNAMIC */ /* blk_queue_discard() is available */ /* #undef HAVE_BLK_QUEUE_DISCARD */ /* blk_queue_flag_clear() exists */ /* #undef HAVE_BLK_QUEUE_FLAG_CLEAR */ /* blk_queue_flag_set() exists */ /* #undef HAVE_BLK_QUEUE_FLAG_SET */ /* blk_queue_flush() is available */ /* #undef HAVE_BLK_QUEUE_FLUSH */ /* blk_queue_flush() is GPL-only */ /* #undef HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */ /* blk_queue_secdiscard() is available */ /* #undef HAVE_BLK_QUEUE_SECDISCARD */ /* blk_queue_secure_erase() is available */ /* #undef HAVE_BLK_QUEUE_SECURE_ERASE */ /* blk_queue_update_readahead() exists */ /* #undef HAVE_BLK_QUEUE_UPDATE_READAHEAD */ /* blk_queue_write_cache() exists */ /* #undef HAVE_BLK_QUEUE_WRITE_CACHE */ /* blk_queue_write_cache() is GPL-only */ /* #undef HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY */ /* Define if revalidate_disk() in block_device_operations */ /* #undef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK */ /* Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the CoreFoundation framework. */ /* #undef HAVE_CFLOCALECOPYCURRENT */ /* Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ /* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */ /* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in the CoreFoundation framework. */ /* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */ /* check_disk_change() exists */ /* #undef HAVE_CHECK_DISK_CHANGE */ /* clear_inode() is available */ /* #undef HAVE_CLEAR_INODE */ /* dentry uses const struct dentry_operations */ /* #undef HAVE_CONST_DENTRY_OPERATIONS */ /* copy_from_iter() is available */ /* #undef HAVE_COPY_FROM_ITER */ /* copy_to_iter() is available */ /* #undef HAVE_COPY_TO_ITER */ /* yes */ /* #undef HAVE_CPU_HOTPLUG */ /* current_time() exists */ /* #undef HAVE_CURRENT_TIME */ /* Define if the GNU dcgettext() function is already present or preinstalled. */ /* #undef HAVE_DCGETTEXT */ /* DECLARE_EVENT_CLASS() is available */ /* #undef HAVE_DECLARE_EVENT_CLASS */ /* dequeue_signal() takes 4 arguments */ /* #undef HAVE_DEQUEUE_SIGNAL_4ARG */ /* lookup_bdev() wants dev_t arg */ /* #undef HAVE_DEVT_LOOKUP_BDEV */ /* sops->dirty_inode() wants flags */ /* #undef HAVE_DIRTY_INODE_WITH_FLAGS */ /* disk_*_io_acct() available */ /* #undef HAVE_DISK_IO_ACCT */ /* disk_update_readahead() exists */ /* #undef HAVE_DISK_UPDATE_READAHEAD */ /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* d_make_root() is available */ /* #undef HAVE_D_MAKE_ROOT */ /* d_prune_aliases() is available */ /* #undef HAVE_D_PRUNE_ALIASES */ /* dops->d_revalidate() operation takes nameidata */ /* #undef HAVE_D_REVALIDATE_NAMEIDATA */ /* eops->encode_fh() wants child and parent inodes */ /* #undef HAVE_ENCODE_FH_WITH_INODE */ /* sops->evict_inode() exists */ /* #undef HAVE_EVICT_INODE */ /* FALLOC_FL_ZERO_RANGE is defined */ /* #undef HAVE_FALLOC_FL_ZERO_RANGE */ /* fault_in_iov_iter_readable() is available */ /* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */ /* fops->aio_fsync() exists */ /* #undef HAVE_FILE_AIO_FSYNC */ /* file_dentry() is available */ /* #undef HAVE_FILE_DENTRY */ /* file_inode() is available */ /* #undef HAVE_FILE_INODE */ /* iops->follow_link() cookie */ /* #undef HAVE_FOLLOW_LINK_COOKIE */ /* iops->follow_link() nameidata */ /* #undef HAVE_FOLLOW_LINK_NAMEIDATA */ /* fops->fsync() with range */ /* #undef HAVE_FSYNC_RANGE */ /* fops->fsync() without dentry */ /* #undef HAVE_FSYNC_WITHOUT_DENTRY */ /* generic_fillattr requires struct user_namespace* */ /* #undef HAVE_GENERIC_FILLATTR_USERNS */ /* generic_*_io_acct() 3 arg available */ /* #undef HAVE_GENERIC_IO_ACCT_3ARG */ /* generic_*_io_acct() 4 arg available */ /* #undef HAVE_GENERIC_IO_ACCT_4ARG */ /* generic_readlink is global */ /* #undef HAVE_GENERIC_READLINK */ /* generic_setxattr() exists */ /* #undef HAVE_GENERIC_SETXATTR */ /* generic_write_checks() takes kiocb */ /* #undef HAVE_GENERIC_WRITE_CHECKS_KIOCB */ /* Define if the GNU gettext() function is already present or preinstalled. */ /* #undef HAVE_GETTEXT */ /* iops->get_acl() exists */ /* #undef HAVE_GET_ACL */ /* iops->get_acl() takes rcu */ /* #undef HAVE_GET_ACL_RCU */ /* iops->get_link() cookie */ /* #undef HAVE_GET_LINK_COOKIE */ /* iops->get_link() delayed */ /* #undef HAVE_GET_LINK_DELAYED */ /* group_info->gid exists */ /* #undef HAVE_GROUP_INFO_GID */ /* has_capability() is available */ /* #undef HAVE_HAS_CAPABILITY */ /* Define if you have the iconv() function and it works. */ #define HAVE_ICONV 1 /* Define if compiler supports -Winfinite-recursion */ /* #undef HAVE_INFINITE_RECURSION */ /* yes */ /* #undef HAVE_INODE_LOCK_SHARED */ /* inode_owner_or_capable() exists */ /* #undef HAVE_INODE_OWNER_OR_CAPABLE */ /* inode_owner_or_capable() takes user_ns */ /* #undef HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED */ /* inode_set_flags() exists */ /* #undef HAVE_INODE_SET_FLAGS */ /* inode_set_iversion() exists */ /* #undef HAVE_INODE_SET_IVERSION */ /* inode->i_*time's are timespec64 */ /* #undef HAVE_INODE_TIMESPEC64_TIMES */ /* timestamp_truncate() exists */ /* #undef HAVE_INODE_TIMESTAMP_TRUNCATE */ /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* in_compat_syscall() is available */ /* #undef HAVE_IN_COMPAT_SYSCALL */ /* iops->create() takes struct user_namespace* */ /* #undef HAVE_IOPS_CREATE_USERNS */ /* iops->mkdir() takes struct user_namespace* */ /* #undef HAVE_IOPS_MKDIR_USERNS */ /* iops->mknod() takes struct user_namespace* */ /* #undef HAVE_IOPS_MKNOD_USERNS */ /* iops->permission() takes struct user_namespace* */ /* #undef HAVE_IOPS_PERMISSION_USERNS */ /* iops->rename() takes struct user_namespace* */ /* #undef HAVE_IOPS_RENAME_USERNS */ /* iops->symlink() takes struct user_namespace* */ /* #undef HAVE_IOPS_SYMLINK_USERNS */ /* iov_iter_advance() is available */ /* #undef HAVE_IOV_ITER_ADVANCE */ /* iov_iter_count() is available */ /* #undef HAVE_IOV_ITER_COUNT */ /* iov_iter_fault_in_readable() is available */ /* #undef HAVE_IOV_ITER_FAULT_IN_READABLE */ /* iov_iter_revert() is available */ /* #undef HAVE_IOV_ITER_REVERT */ /* iov_iter_type() is available */ /* #undef HAVE_IOV_ITER_TYPE */ /* iov_iter types are available */ /* #undef HAVE_IOV_ITER_TYPES */ /* yes */ /* #undef HAVE_IO_SCHEDULE_TIMEOUT */ /* Define to 1 if you have the `issetugid' function. */ #define HAVE_ISSETUGID 1 /* kernel has kernel_fpu_* functions */ /* #undef HAVE_KERNEL_FPU */ /* kernel has asm/fpu/api.h */ /* #undef HAVE_KERNEL_FPU_API_HEADER */ /* kernel fpu internal */ /* #undef HAVE_KERNEL_FPU_INTERNAL */ /* kernel has asm/fpu/internal.h */ /* #undef HAVE_KERNEL_FPU_INTERNAL_HEADER */ /* uncached_acl_sentinel() exists */ /* #undef HAVE_KERNEL_GET_ACL_HANDLE_CACHE */ /* kernel does stack verification */ /* #undef HAVE_KERNEL_OBJTOOL */ /* kernel has linux/objtool.h */ /* #undef HAVE_KERNEL_OBJTOOL_HEADER */ /* kernel_read() take loff_t pointer */ /* #undef HAVE_KERNEL_READ_PPOS */ /* timer_list.function gets a timer_list */ /* #undef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST */ /* struct timer_list has a flags member */ /* #undef HAVE_KERNEL_TIMER_LIST_FLAGS */ /* timer_setup() is available */ /* #undef HAVE_KERNEL_TIMER_SETUP */ /* kernel_write() take loff_t pointer */ /* #undef HAVE_KERNEL_WRITE_PPOS */ /* kmem_cache_create_usercopy() exists */ /* #undef HAVE_KMEM_CACHE_CREATE_USERCOPY */ /* kstrtoul() exists */ /* #undef HAVE_KSTRTOUL */ /* ktime_get_coarse_real_ts64() exists */ /* #undef HAVE_KTIME_GET_COARSE_REAL_TS64 */ /* ktime_get_raw_ts64() exists */ /* #undef HAVE_KTIME_GET_RAW_TS64 */ /* kvmalloc exists */ /* #undef HAVE_KVMALLOC */ /* Define if you have [aio] */ /* #undef HAVE_LIBAIO */ /* Define if you have [blkid] */ /* #undef HAVE_LIBBLKID */ /* Define if you have [crypto] */ #define HAVE_LIBCRYPTO 1 /* Define if you have [tirpc] */ /* #undef HAVE_LIBTIRPC */ /* Define if you have [udev] */ /* #undef HAVE_LIBUDEV */ /* Define if you have [uuid] */ /* #undef HAVE_LIBUUID */ /* linux/blk-cgroup.h exists */ /* #undef HAVE_LINUX_BLK_CGROUP_HEADER */ /* lseek_execute() is available */ /* #undef HAVE_LSEEK_EXECUTE */ /* makedev() is declared in sys/mkdev.h */ /* #undef HAVE_MAKEDEV_IN_MKDEV */ /* makedev() is declared in sys/sysmacros.h */ /* #undef HAVE_MAKEDEV_IN_SYSMACROS */ /* Noting that make_request_fn() returns blk_qc_t */ /* #undef HAVE_MAKE_REQUEST_FN_RET_QC */ /* Noting that make_request_fn() returns void */ /* #undef HAVE_MAKE_REQUEST_FN_RET_VOID */ /* iops->mkdir() takes umode_t */ /* #undef HAVE_MKDIR_UMODE_T */ /* Define to 1 if you have the `mlockall' function. */ #define HAVE_MLOCKALL 1 /* lookup_bdev() wants mode arg */ /* #undef HAVE_MODE_LOOKUP_BDEV */ /* Define if host toolchain supports MOVBE */ #define HAVE_MOVBE 1 /* new_sync_read()/new_sync_write() are available */ /* #undef HAVE_NEW_SYNC_READ */ /* folio_wait_bit() exists */ /* #undef HAVE_PAGEMAP_FOLIO_WAIT_BIT */ /* iops->getattr() takes a path */ /* #undef HAVE_PATH_IOPS_GETATTR */ /* Define if host toolchain supports PCLMULQDQ */ #define HAVE_PCLMULQDQ 1 /* percpu_counter_add_batch() is defined */ /* #undef HAVE_PERCPU_COUNTER_ADD_BATCH */ /* percpu_counter_init() wants gfp_t */ /* #undef HAVE_PERCPU_COUNTER_INIT_WITH_GFP */ /* posix_acl_chmod() exists */ /* #undef HAVE_POSIX_ACL_CHMOD */ /* posix_acl_from_xattr() needs user_ns */ /* #undef HAVE_POSIX_ACL_FROM_XATTR_USERNS */ /* posix_acl_release() is available */ /* #undef HAVE_POSIX_ACL_RELEASE */ /* posix_acl_release() is GPL-only */ /* #undef HAVE_POSIX_ACL_RELEASE_GPL_ONLY */ /* posix_acl_valid() wants user namespace */ /* #undef HAVE_POSIX_ACL_VALID_WITH_NS */ /* proc_ops structure exists */ /* #undef HAVE_PROC_OPS_STRUCT */ /* iops->put_link() cookie */ /* #undef HAVE_PUT_LINK_COOKIE */ /* iops->put_link() delayed */ /* #undef HAVE_PUT_LINK_DELAYED */ /* iops->put_link() nameidata */ /* #undef HAVE_PUT_LINK_NAMEIDATA */ /* If available, contains the Python version number currently in use. */ #define HAVE_PYTHON "3.7" /* qat is enabled and existed */ /* #undef HAVE_QAT */ /* register_shrinker is vararg */ /* #undef HAVE_REGISTER_SHRINKER_VARARG */ /* iops->rename() wants flags */ /* #undef HAVE_RENAME_WANTS_FLAGS */ /* REQ_DISCARD is defined */ /* #undef HAVE_REQ_DISCARD */ /* REQ_FLUSH is defined */ /* #undef HAVE_REQ_FLUSH */ /* REQ_OP_DISCARD is defined */ /* #undef HAVE_REQ_OP_DISCARD */ /* REQ_OP_FLUSH is defined */ /* #undef HAVE_REQ_OP_FLUSH */ /* REQ_OP_SECURE_ERASE is defined */ /* #undef HAVE_REQ_OP_SECURE_ERASE */ /* REQ_PREFLUSH is defined */ /* #undef HAVE_REQ_PREFLUSH */ /* revalidate_disk() is available */ /* #undef HAVE_REVALIDATE_DISK */ /* revalidate_disk_size() is available */ /* #undef HAVE_REVALIDATE_DISK_SIZE */ /* struct rw_semaphore has member activity */ /* #undef HAVE_RWSEM_ACTIVITY */ /* struct rw_semaphore has atomic_long_t member count */ /* #undef HAVE_RWSEM_ATOMIC_LONG_COUNT */ /* linux/sched/signal.h exists */ /* #undef HAVE_SCHED_SIGNAL_HEADER */ /* Define to 1 if you have the header file. */ #define HAVE_SECURITY_PAM_MODULES_H 1 /* setattr_prepare() is available, doesn't accept user_namespace */ /* #undef HAVE_SETATTR_PREPARE_NO_USERNS */ /* setattr_prepare() accepts user_namespace */ /* #undef HAVE_SETATTR_PREPARE_USERNS */ /* iops->set_acl() exists, takes 3 args */ /* #undef HAVE_SET_ACL */ /* iops->set_acl() takes 4 args */ /* #undef HAVE_SET_ACL_USERNS */ /* set_cached_acl() is usable */ /* #undef HAVE_SET_CACHED_ACL_USABLE */ /* set_special_state() exists */ /* #undef HAVE_SET_SPECIAL_STATE */ /* struct shrink_control exists */ /* #undef HAVE_SHRINK_CONTROL_STRUCT */ /* kernel_siginfo_t exists */ /* #undef HAVE_SIGINFO */ /* signal_stop() exists */ /* #undef HAVE_SIGNAL_STOP */ /* new shrinker callback wants 2 args */ /* #undef HAVE_SINGLE_SHRINKER_CALLBACK */ /* cs->count_objects exists */ /* #undef HAVE_SPLIT_SHRINKER_CALLBACK */ #if defined(__amd64__) || defined(__i386__) /* Define if host toolchain supports SSE */ #define HAVE_SSE 1 /* Define if host toolchain supports SSE2 */ #define HAVE_SSE2 1 /* Define if host toolchain supports SSE3 */ #define HAVE_SSE3 1 /* Define if host toolchain supports SSE4.1 */ #define HAVE_SSE4_1 1 /* Define if host toolchain supports SSE4.2 */ #define HAVE_SSE4_2 1 /* Define if host toolchain supports SSSE3 */ #define HAVE_SSSE3 1 #endif /* STACK_FRAME_NON_STANDARD is defined */ /* #undef HAVE_STACK_FRAME_NON_STANDARD */ /* standalone exists */ /* #undef HAVE_STANDALONE_LINUX_STDARG */ /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDIO_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the `strlcat' function. */ #define HAVE_STRLCAT 1 /* Define to 1 if you have the `strlcpy' function. */ #define HAVE_STRLCPY 1 /* submit_bio is member of struct block_device_operations */ /* #undef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */ /* super_setup_bdi_name() exits */ /* #undef HAVE_SUPER_SETUP_BDI_NAME */ /* super_block->s_user_ns exists */ /* #undef HAVE_SUPER_USER_NS */ /* struct kobj_type has default_groups */ /* #undef HAVE_SYSFS_DEFAULT_GROUPS */ /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* i_op->tmpfile() exists */ /* #undef HAVE_TMPFILE */ /* i_op->tmpfile() has userns */ /* #undef HAVE_TMPFILE_USERNS */ /* totalhigh_pages() exists */ /* #undef HAVE_TOTALHIGH_PAGES */ /* kernel has totalram_pages() */ /* #undef HAVE_TOTALRAM_PAGES_FUNC */ /* Define to 1 if you have the `udev_device_get_is_initialized' function. */ /* #undef HAVE_UDEV_DEVICE_GET_IS_INITIALIZED */ /* kernel has __kernel_fpu_* functions */ /* #undef HAVE_UNDERSCORE_KERNEL_FPU */ /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* iops->getattr() takes struct user_namespace* */ /* #undef HAVE_USERNS_IOPS_GETATTR */ /* user_namespace->ns.inum exists */ /* #undef HAVE_USER_NS_COMMON_INUM */ /* iops->getattr() takes a vfsmount */ /* #undef HAVE_VFSMOUNT_IOPS_GETATTR */ /* aops->direct_IO() uses iovec */ /* #undef HAVE_VFS_DIRECT_IO_IOVEC */ /* aops->direct_IO() uses iov_iter without rw */ /* #undef HAVE_VFS_DIRECT_IO_ITER */ /* aops->direct_IO() uses iov_iter with offset */ /* #undef HAVE_VFS_DIRECT_IO_ITER_OFFSET */ /* aops->direct_IO() uses iov_iter with rw and offset */ /* #undef HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET */ /* filemap_dirty_folio exists */ /* #undef HAVE_VFS_FILEMAP_DIRTY_FOLIO */ /* All required iov_iter interfaces are available */ /* #undef HAVE_VFS_IOV_ITER */ /* fops->iterate() is available */ /* #undef HAVE_VFS_ITERATE */ /* fops->iterate_shared() is available */ /* #undef HAVE_VFS_ITERATE_SHARED */ /* fops->readdir() is available */ /* #undef HAVE_VFS_READDIR */ /* address_space_operations->readpages exists */ /* #undef HAVE_VFS_READPAGES */ /* read_folio exists */ /* #undef HAVE_VFS_READ_FOLIO */ /* fops->read/write_iter() are available */ /* #undef HAVE_VFS_RW_ITERATE */ /* __set_page_dirty_nobuffers exists */ /* #undef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS */ /* __vmalloc page flags exists */ /* #undef HAVE_VMALLOC_PAGE_KERNEL */ /* yes */ /* #undef HAVE_WAIT_ON_BIT_ACTION */ /* wait_queue_entry_t exists */ /* #undef HAVE_WAIT_QUEUE_ENTRY_T */ /* wq_head->head and wq_entry->entry exist */ /* #undef HAVE_WAIT_QUEUE_HEAD_ENTRY */ /* xattr_handler->get() wants dentry */ /* #undef HAVE_XATTR_GET_DENTRY */ /* xattr_handler->get() wants both dentry and inode */ /* #undef HAVE_XATTR_GET_DENTRY_INODE */ /* xattr_handler->get() wants xattr_handler */ /* #undef HAVE_XATTR_GET_HANDLER */ /* xattr_handler has name */ /* #undef HAVE_XATTR_HANDLER_NAME */ /* xattr_handler->list() wants dentry */ /* #undef HAVE_XATTR_LIST_DENTRY */ /* xattr_handler->list() wants xattr_handler */ /* #undef HAVE_XATTR_LIST_HANDLER */ /* xattr_handler->list() wants simple */ /* #undef HAVE_XATTR_LIST_SIMPLE */ /* xattr_handler->set() wants dentry */ /* #undef HAVE_XATTR_SET_DENTRY */ /* xattr_handler->set() wants both dentry and inode */ /* #undef HAVE_XATTR_SET_DENTRY_INODE */ /* xattr_handler->set() wants xattr_handler */ /* #undef HAVE_XATTR_SET_HANDLER */ /* xattr_handler->set() takes user_namespace */ /* #undef HAVE_XATTR_SET_USERNS */ /* Define if host toolchain supports XSAVE */ #define HAVE_XSAVE 1 /* Define if host toolchain supports XSAVEOPT */ #define HAVE_XSAVEOPT 1 /* Define if host toolchain supports XSAVES */ #define HAVE_XSAVES 1 /* ZERO_PAGE() is GPL-only */ /* #undef HAVE_ZERO_PAGE_GPL_ONLY */ /* Define if you have [z] */ #define HAVE_ZLIB 1 /* __posix_acl_chmod() exists */ /* #undef HAVE___POSIX_ACL_CHMOD */ /* kernel exports FPU functions */ /* #undef KERNEL_EXPORTS_X86_FPU */ /* TBD: fetch(3) support */ #if 0 /* whether the chosen libfetch is to be loaded at run-time */ #define LIBFETCH_DYNAMIC 1 /* libfetch is fetch(3) */ #define LIBFETCH_IS_FETCH 1 /* libfetch is libcurl */ #define LIBFETCH_IS_LIBCURL 0 /* soname of chosen libfetch */ #define LIBFETCH_SONAME "libfetch.so.6" #endif /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* make_request_fn() return type */ /* #undef MAKE_REQUEST_FN_RET */ /* hardened module_param_call */ /* #undef MODULE_PARAM_CALL_CONST */ /* struct shrink_control has nid */ /* #undef SHRINK_CONTROL_HAS_NID */ /* using complete_and_exit() instead */ /* #undef SPL_KTHREAD_COMPLETE_AND_EXIT */ /* Defined for legacy compatibility. */ #define SPL_META_ALIAS ZFS_META_ALIAS /* Defined for legacy compatibility. */ #define SPL_META_RELEASE ZFS_META_RELEASE /* Defined for legacy compatibility. */ #define SPL_META_VERSION ZFS_META_VERSION /* pde_data() is PDE_DATA() */ /* #undef SPL_PDE_DATA */ /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #define SYSTEM_FREEBSD 1 /* True if ZFS is to be compiled for a Linux system */ /* #undef SYSTEM_LINUX */ /* Version number of package */ /* #undef ZFS_DEBUG */ /* /dev/zfs minor */ /* #undef ZFS_DEVICE_MINOR */ /* enum node_stat_item contains NR_FILE_PAGES */ /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES */ /* enum node_stat_item contains NR_INACTIVE_ANON */ /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON */ /* enum node_stat_item contains NR_INACTIVE_FILE */ /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE */ /* enum zone_stat_item contains NR_FILE_PAGES */ /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_FILE_PAGES */ /* enum zone_stat_item contains NR_INACTIVE_ANON */ /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_ANON */ /* enum zone_stat_item contains NR_INACTIVE_FILE */ /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_FILE */ /* GENHD_FL_EXT_DEVT flag is not available */ /* #undef ZFS_GENHD_FL_EXT_DEVT */ /* GENHD_FL_NO_PART_SCAN flag is available */ /* #undef ZFS_GENHD_FL_NO_PART */ /* global_node_page_state() exists */ /* #undef ZFS_GLOBAL_NODE_PAGE_STATE */ /* global_zone_page_state() exists */ /* #undef ZFS_GLOBAL_ZONE_PAGE_STATE */ /* Define to 1 if GPL-only symbols can be used */ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gb3d0568cf" +#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_ga582d5299" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" /* Define the project release date. */ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ #define ZFS_META_KVER_MAX "5.19" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "3.10" /* Define the project license. */ #define ZFS_META_LICENSE "CDDL" /* Define the libtool library 'age' version information. */ /* #undef ZFS_META_LT_AGE */ /* Define the libtool library 'current' version information. */ /* #undef ZFS_META_LT_CURRENT */ /* Define the libtool library 'revision' version information. */ /* #undef ZFS_META_LT_REVISION */ /* Define the project name. */ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_gb3d0568cf" +#define ZFS_META_RELEASE "FreeBSD_ga582d5299" /* Define the project version. */ #define ZFS_META_VERSION "2.1.99" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index e883b2a52eef..fcf064562d07 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.99-1329-gb3d0568cf" +#define ZFS_META_GITREV "zfs-2.1.99-1335-ga582d5299"