diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 8dd75e0bb4da..921edcc63341 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -1,1298 +1,1337 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016, 2017, Intel Corporation. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. */ /* * ZFS syseventd module. * * file origin: openzfs/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c * * The purpose of this module is to identify when devices are added to the * system, and appropriately online or replace the affected vdevs. * * When a device is added to the system: * * 1. Search for any vdevs whose devid matches that of the newly added * device. * * 2. If no vdevs are found, then search for any vdevs whose udev path * matches that of the new device. * * 3. If no vdevs match by either method, then ignore the event. * * 4. Attempt to online the device with a flag to indicate that it should * be unspared when resilvering completes. If this succeeds, then the * same device was inserted and we should continue normally. * * 5. If the pool does not have the 'autoreplace' property set, attempt to * online the device again without the unspare flag, which will * generate a FMA fault. * * 6. If the pool has the 'autoreplace' property set, and the matching vdev * is a whole disk, then label the new disk and attempt a 'zpool * replace'. * * The module responds to EC_DEV_ADD events. The special ESC_ZFS_VDEV_CHECK * event indicates that a device failed to open during pool load, but the * autoreplace property was set. In this case, we deferred the associated * FMA fault until our module had a chance to process the autoreplace logic. * If the device could not be replaced, then the second online attempt will * trigger the FMA fault that we skipped earlier. * * On Linux udev provides a disk insert for both the disk and the partition. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_agents.h" #include "../zed_log.h" #define DEV_BYID_PATH "/dev/disk/by-id/" #define DEV_BYPATH_PATH "/dev/disk/by-path/" #define DEV_BYVDEV_PATH "/dev/disk/by-vdev/" typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t); libzfs_handle_t *g_zfshdl; list_t g_pool_list; /* list of unavailable pools at initialization */ list_t g_device_list; /* list of disks with asynchronous label request */ tpool_t *g_tpool; boolean_t g_enumeration_done; pthread_t g_zfs_tid; /* zfs_enum_pools() thread */ typedef struct unavailpool { zpool_handle_t *uap_zhp; list_node_t uap_node; } unavailpool_t; typedef struct pendingdev { char pd_physpath[128]; list_node_t pd_node; } pendingdev_t; static int zfs_toplevel_state(zpool_handle_t *zhp) { nvlist_t *nvroot; vdev_stat_t *vs; unsigned int c; verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); return (vs->vs_state); } static int zfs_unavail_pool(zpool_handle_t *zhp, void *data) { zed_log_msg(LOG_INFO, "zfs_unavail_pool: examining '%s' (state %d)", zpool_get_name(zhp), (int)zfs_toplevel_state(zhp)); if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) { unavailpool_t *uap; uap = malloc(sizeof (unavailpool_t)); uap->uap_zhp = zhp; list_insert_tail((list_t *)data, uap); } else { zpool_close(zhp); } return (0); } +/* + * Write an array of strings to the zed log + */ +static void lines_to_zed_log_msg(char **lines, int lines_cnt) +{ + int i; + for (i = 0; i < lines_cnt; i++) { + zed_log_msg(LOG_INFO, "%s", lines[i]); + } +} + /* * Two stage replace on Linux * since we get disk notifications * we can wait for partitioned disk slice to show up! * * First stage tags the disk, initiates async partitioning, and returns * Second stage finds the tag and proceeds to ZFS labeling/replace * * disk-add --> label-disk + tag-disk --> partition-add --> zpool_vdev_attach * * 1. physical match with no fs, no partition * tag it top, partition disk * * 2. physical match again, see partition and tag * */ /* * The device associated with the given vdev (either by devid or physical path) * has been added to the system. If 'isdisk' is set, then we only attempt a * replacement if it's a whole disk. This also implies that we should label the * disk first. * * First, we attempt to online the device (making sure to undo any spare * operation when finished). If this succeeds, then we're done. If it fails, * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened, * but that the label was not what we expected. If the 'autoreplace' property * is enabled, then we relabel the disk (if specified), and attempt a 'zpool * replace'. If the online is successful, but the new state is something else * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of * race, and we should avoid attempting to relabel the disk. * * Also can arrive here from a ESC_ZFS_VDEV_CHECK event */ static void zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) { char *path; vdev_state_t newstate; nvlist_t *nvroot, *newvd; pendingdev_t *device; uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL, faulted = 0ULL; uint64_t guid = 0ULL; uint64_t is_spare = 0; char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; boolean_t is_sd = B_FALSE; boolean_t is_mpath_wholedisk = B_FALSE; uint_t c; vdev_stat_t *vs; + char **lines = NULL; + int lines_cnt = 0; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) return; /* Skip healthy disks */ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (vs->vs_state == VDEV_STATE_HEALTHY) { zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.", __func__, path); return; } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_IS_SPARE, &is_spare); /* * Special case: * * We've seen times where a disk won't have a ZPOOL_CONFIG_PHYS_PATH * entry in their config. For example, on this force-faulted disk: * * children[0]: * type: 'disk' * id: 0 * guid: 14309659774640089719 * path: '/dev/disk/by-vdev/L28' * whole_disk: 0 * DTL: 654 * create_txg: 4 * com.delphix:vdev_zap_leaf: 1161 * faulted: 1 * aux_state: 'external' * children[1]: * type: 'disk' * id: 1 * guid: 16002508084177980912 * path: '/dev/disk/by-vdev/L29' * devid: 'dm-uuid-mpath-35000c500a61d68a3' * phys_path: 'L29' * vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32' * whole_disk: 0 * DTL: 1028 * create_txg: 4 * com.delphix:vdev_zap_leaf: 131 * * If the disk's path is a /dev/disk/by-vdev/ path, then we can infer * the ZPOOL_CONFIG_PHYS_PATH from the by-vdev disk name. */ if (physpath == NULL && path != NULL) { /* If path begins with "/dev/disk/by-vdev/" ... */ if (strncmp(path, DEV_BYVDEV_PATH, strlen(DEV_BYVDEV_PATH)) == 0) { /* Set physpath to the char after "/dev/disk/by-vdev" */ physpath = &path[strlen(DEV_BYVDEV_PATH)]; } } /* * We don't want to autoreplace offlined disks. However, we do want to * replace force-faulted disks (`zpool offline -f`). Force-faulted * disks have both offline=1 and faulted=1 in the nvlist. */ if (offline && !faulted) { zed_log_msg(LOG_INFO, "%s: %s is offline, skip autoreplace", __func__, path); return; } is_mpath_wholedisk = is_mpath_whole_disk(path); zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'" " %s blank disk, %s mpath blank disk, %s labeled, enc sysfs '%s', " "(guid %llu)", zpool_get_name(zhp), path, physpath ? physpath : "NULL", wholedisk ? "is" : "not", is_mpath_wholedisk? "is" : "not", labeled ? "is" : "not", enc_sysfs_path, (long long unsigned int)guid); /* * The VDEV guid is preferred for identification (gets passed in path) */ if (guid != 0) { (void) snprintf(fullpath, sizeof (fullpath), "%llu", (long long unsigned int)guid); } else { /* * otherwise use path sans partition suffix for whole disks */ (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { char *spath = zfs_strip_partition(fullpath); if (!spath) { zed_log_msg(LOG_INFO, "%s: Can't alloc", __func__); return; } (void) strlcpy(fullpath, spath, sizeof (fullpath)); free(spath); } } if (is_spare) online_flag |= ZFS_ONLINE_SPARE; /* * Attempt to online the device. */ if (zpool_vdev_online(zhp, fullpath, online_flag, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, " zpool_vdev_online: vdev '%s' ('%s') is " "%s", fullpath, physpath, (newstate == VDEV_STATE_HEALTHY) ? "HEALTHY" : "DEGRADED"); return; } /* * vdev_id alias rule for using scsi_debug devices (FMA automated * testing) */ if (physpath != NULL && strcmp("scsidebug", physpath) == 0) is_sd = B_TRUE; /* * If the pool doesn't have the autoreplace property set, then use * vdev online to trigger a FMA fault by posting an ereport. */ if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) || !(wholedisk || is_mpath_wholedisk) || (physpath == NULL)) { (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or " "not a blank disk for '%s' ('%s')", fullpath, physpath); return; } /* * Convert physical path into its current device node. Rawpath * needs to be /dev/disk/by-vdev for a scsi_debug device since * /dev/disk/by-path will not be present. */ (void) snprintf(rawpath, sizeof (rawpath), "%s%s", is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath); if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) { zed_log_msg(LOG_INFO, " realpath: %s failed (%s)", rawpath, strerror(errno)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", fullpath, libzfs_error_description(g_zfshdl)); return; } /* Only autoreplace bad disks */ if ((vs->vs_state != VDEV_STATE_DEGRADED) && (vs->vs_state != VDEV_STATE_FAULTED) && (vs->vs_state != VDEV_STATE_CANT_OPEN)) { zed_log_msg(LOG_INFO, " not autoreplacing since disk isn't in " "a bad state (currently %d)", vs->vs_state); return; } nvlist_lookup_string(vdev, "new_devid", &new_devid); if (is_mpath_wholedisk) { /* Don't label device mapper or multipath disks. */ + zed_log_msg(LOG_INFO, + " it's a multipath wholedisk, don't label"); + if (zpool_prepare_disk(zhp, vdev, "autoreplace", &lines, + &lines_cnt) != 0) { + zed_log_msg(LOG_INFO, + " zpool_prepare_disk: could not " + "prepare '%s' (%s)", fullpath, + libzfs_error_description(g_zfshdl)); + if (lines_cnt > 0) { + zed_log_msg(LOG_INFO, + " zfs_prepare_disk output:"); + lines_to_zed_log_msg(lines, lines_cnt); + } + libzfs_free_str_array(lines, lines_cnt); + return; + } } else if (!labeled) { /* * we're auto-replacing a raw disk, so label it first */ char *leafname; /* * If this is a request to label a whole disk, then attempt to * write out the label. Before we can label the disk, we need * to map the physical string that was matched on to the under * lying device node. * * If any part of this process fails, then do a force online * to trigger a ZFS fault for the device (and any hot spare * replacement). */ leafname = strrchr(devpath, '/') + 1; /* * If this is a request to label a whole disk, then attempt to * write out the label. */ - if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { - zed_log_msg(LOG_INFO, " zpool_label_disk: could not " + if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname, + vdev, "autoreplace", &lines, &lines_cnt) != 0) { + zed_log_msg(LOG_INFO, + " zpool_prepare_and_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); + if (lines_cnt > 0) { + zed_log_msg(LOG_INFO, + " zfs_prepare_disk output:"); + lines_to_zed_log_msg(lines, lines_cnt); + } + libzfs_free_str_array(lines, lines_cnt); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } /* * The disk labeling is asynchronous on Linux. Just record * this label request and return as there will be another * disk add event for the partition after the labeling is * completed. */ device = malloc(sizeof (pendingdev_t)); (void) strlcpy(device->pd_physpath, physpath, sizeof (device->pd_physpath)); list_insert_tail(&g_device_list, device); zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", leafname, (u_longlong_t)guid); return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ } else /* labeled */ { boolean_t found = B_FALSE; /* * match up with request above to label the disk */ for (device = list_head(&g_device_list); device != NULL; device = list_next(&g_device_list, device)) { if (strcmp(physpath, device->pd_physpath) == 0) { list_remove(&g_device_list, device); free(device); found = B_TRUE; break; } zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s", physpath, device->pd_physpath); } if (!found) { /* unexpected partition slice encountered */ zed_log_msg(LOG_INFO, "labeled disk %s unexpected here", fullpath); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)", physpath, (u_longlong_t)guid); (void) snprintf(devpath, sizeof (devpath), "%s%s", DEV_BYID_PATH, new_devid); } + libzfs_free_str_array(lines, lines_cnt); + /* * Construct the root vdev to pass to zpool_vdev_attach(). While adding * the entire vdev structure is harmless, we construct a reduced set of * path/physpath/wholedisk to keep it simple. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); return; } if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); nvlist_free(nvroot); return; } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 || (physpath != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) || (enc_sysfs_path != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, enc_sysfs_path) != 0) || nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs"); nvlist_free(newvd); nvlist_free(nvroot); return; } nvlist_free(newvd); /* * Wait for udev to verify the links exist, then auto-replace * the leaf disk at same physical location. */ if (zpool_label_disk_wait(path, 3000) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement " "disk %s is missing", path); nvlist_free(nvroot); return; } /* * Prefer sequential resilvering when supported (mirrors and dRAID), * otherwise fallback to a traditional healing resilver. */ ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_TRUE); if (ret != 0) { ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_FALSE); } zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", fullpath, path, (ret == 0) ? "no errors" : libzfs_error_description(g_zfshdl)); nvlist_free(nvroot); } /* * Utility functions to find a vdev matching given criteria. */ typedef struct dev_data { const char *dd_compare; const char *dd_prop; zfs_process_func_t dd_func; boolean_t dd_found; boolean_t dd_islabeled; uint64_t dd_pool_guid; uint64_t dd_vdev_guid; uint64_t dd_new_vdev_guid; const char *dd_new_devid; uint64_t dd_num_spares; } dev_data_t; static void zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) { dev_data_t *dp = data; char *path = NULL; uint_t c, children; nvlist_t **child; uint64_t guid = 0; uint64_t isspare = 0; /* * First iterate over any children. */ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) zfs_iter_vdev(zhp, child[c], data); } /* * Iterate over any spares and cache devices */ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { for (c = 0; c < children; c++) zfs_iter_vdev(zhp, child[c], data); } if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) zfs_iter_vdev(zhp, child[c], data); } /* once a vdev was matched and processed there is nothing left to do */ if (dp->dd_found && dp->dd_num_spares == 0) return; (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &guid); /* * Match by GUID if available otherwise fallback to devid or physical */ if (dp->dd_vdev_guid != 0) { if (guid != dp->dd_vdev_guid) return; zed_log_msg(LOG_INFO, " zfs_iter_vdev: matched on %llu", guid); dp->dd_found = B_TRUE; } else if (dp->dd_compare != NULL) { /* * NOTE: On Linux there is an event for partition, so unlike * illumos, substring matching is not required to accommodate * the partition suffix. An exact match will be present in * the dp->dd_compare value. * If the attached disk already contains a vdev GUID, it means * the disk is not clean. In such a scenario, the physical path * would be a match that makes the disk faulted when trying to * online it. So, we would only want to proceed if either GUID * matches with the last attached disk or the disk is in clean * state. */ if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 || strcmp(dp->dd_compare, path) != 0) { return; } if (dp->dd_new_vdev_guid != 0 && dp->dd_new_vdev_guid != guid) { zed_log_msg(LOG_INFO, " %s: no match (GUID:%llu" " != vdev GUID:%llu)", __func__, dp->dd_new_vdev_guid, guid); return; } zed_log_msg(LOG_INFO, " zfs_iter_vdev: matched %s on %s", dp->dd_prop, path); dp->dd_found = B_TRUE; /* pass the new devid for use by replacing code */ if (dp->dd_new_devid != NULL) { (void) nvlist_add_string(nvl, "new_devid", dp->dd_new_devid); } } if (dp->dd_found == B_TRUE && nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) dp->dd_num_spares++; (dp->dd_func)(zhp, nvl, dp->dd_islabeled); } static void zfs_enable_ds(void *arg) { unavailpool_t *pool = (unavailpool_t *)arg; (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0); zpool_close(pool->uap_zhp); free(pool); } static int zfs_iter_pool(zpool_handle_t *zhp, void *data) { nvlist_t *config, *nvl; dev_data_t *dp = data; uint64_t pool_guid; unavailpool_t *pool; zed_log_msg(LOG_INFO, "zfs_iter_pool: evaluating vdevs on %s (by %s)", zpool_get_name(zhp), dp->dd_vdev_guid ? "GUID" : dp->dd_prop); /* * For each vdev in this pool, look for a match to apply dd_func */ if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (dp->dd_pool_guid == 0 || (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) { (void) nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl); zfs_iter_vdev(zhp, nvl, data); } } else { zed_log_msg(LOG_INFO, "%s: no config\n", __func__); } /* * if this pool was originally unavailable, * then enable its datasets asynchronously */ if (g_enumeration_done) { for (pool = list_head(&g_pool_list); pool != NULL; pool = list_next(&g_pool_list, pool)) { if (strcmp(zpool_get_name(zhp), zpool_get_name(pool->uap_zhp))) continue; if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) { list_remove(&g_pool_list, pool); (void) tpool_dispatch(g_tpool, zfs_enable_ds, pool); break; } } } zpool_close(zhp); /* cease iteration after a match */ return (dp->dd_found && dp->dd_num_spares == 0); } /* * Given a physical device location, iterate over all * (pool, vdev) pairs which correspond to that location. */ static boolean_t devphys_iter(const char *physical, const char *devid, zfs_process_func_t func, boolean_t is_slice, uint64_t new_vdev_guid) { dev_data_t data = { 0 }; data.dd_compare = physical; data.dd_func = func; data.dd_prop = ZPOOL_CONFIG_PHYS_PATH; data.dd_found = B_FALSE; data.dd_islabeled = is_slice; data.dd_new_devid = devid; /* used by auto replace code */ data.dd_new_vdev_guid = new_vdev_guid; (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data); return (data.dd_found); } /* * Given a device identifier, find any vdevs with a matching by-vdev * path. Normally we shouldn't need this as the comparison would be * made earlier in the devphys_iter(). For example, if we were replacing * /dev/disk/by-vdev/L28, normally devphys_iter() would match the * ZPOOL_CONFIG_PHYS_PATH of "L28" from the old disk config to "L28" * of the new disk config. However, we've seen cases where * ZPOOL_CONFIG_PHYS_PATH was not in the config for the old disk. Here's * an example of a real 2-disk mirror pool where one disk was force * faulted: * * com.delphix:vdev_zap_top: 129 * children[0]: * type: 'disk' * id: 0 * guid: 14309659774640089719 * path: '/dev/disk/by-vdev/L28' * whole_disk: 0 * DTL: 654 * create_txg: 4 * com.delphix:vdev_zap_leaf: 1161 * faulted: 1 * aux_state: 'external' * children[1]: * type: 'disk' * id: 1 * guid: 16002508084177980912 * path: '/dev/disk/by-vdev/L29' * devid: 'dm-uuid-mpath-35000c500a61d68a3' * phys_path: 'L29' * vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32' * whole_disk: 0 * DTL: 1028 * create_txg: 4 * com.delphix:vdev_zap_leaf: 131 * * So in the case above, the only thing we could compare is the path. * * We can do this because we assume by-vdev paths are authoritative as physical * paths. We could not assume this for normal paths like /dev/sda since the * physical location /dev/sda points to could change over time. */ static boolean_t by_vdev_path_iter(const char *by_vdev_path, const char *devid, zfs_process_func_t func, boolean_t is_slice) { dev_data_t data = { 0 }; data.dd_compare = by_vdev_path; data.dd_func = func; data.dd_prop = ZPOOL_CONFIG_PATH; data.dd_found = B_FALSE; data.dd_islabeled = is_slice; data.dd_new_devid = devid; if (strncmp(by_vdev_path, DEV_BYVDEV_PATH, strlen(DEV_BYVDEV_PATH)) != 0) { /* by_vdev_path doesn't start with "/dev/disk/by-vdev/" */ return (B_FALSE); } (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data); return (data.dd_found); } /* * Given a device identifier, find any vdevs with a matching devid. * On Linux we can match devid directly which is always a whole disk. */ static boolean_t devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice) { dev_data_t data = { 0 }; data.dd_compare = devid; data.dd_func = func; data.dd_prop = ZPOOL_CONFIG_DEVID; data.dd_found = B_FALSE; data.dd_islabeled = is_slice; data.dd_new_devid = devid; (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data); return (data.dd_found); } /* * Given a device guid, find any vdevs with a matching guid. */ static boolean_t guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid, zfs_process_func_t func, boolean_t is_slice) { dev_data_t data = { 0 }; data.dd_func = func; data.dd_found = B_FALSE; data.dd_pool_guid = pool_guid; data.dd_vdev_guid = vdev_guid; data.dd_islabeled = is_slice; data.dd_new_devid = devid; (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data); return (data.dd_found); } /* * Handle a EC_DEV_ADD.ESC_DISK event. * * illumos * Expects: DEV_PHYS_PATH string in schema * Matches: vdev's ZPOOL_CONFIG_PHYS_PATH or ZPOOL_CONFIG_DEVID * * path: '/dev/dsk/c0t1d0s0' (persistent) * devid: 'id1,sd@SATA_____Hitachi_HDS72101______JP2940HZ3H74MC/a' * phys_path: '/pci@0,0/pci103c,1609@11/disk@1,0:a' * * linux * provides: DEV_PHYS_PATH and DEV_IDENTIFIER strings in schema * Matches: vdev's ZPOOL_CONFIG_PHYS_PATH or ZPOOL_CONFIG_DEVID * * path: '/dev/sdc1' (not persistent) * devid: 'ata-SAMSUNG_HD204UI_S2HGJD2Z805891-part1' * phys_path: 'pci-0000:04:00.0-sas-0x4433221106000000-lun-0' */ static int zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi) { char *devpath = NULL, *devid = NULL; uint64_t pool_guid = 0, vdev_guid = 0; boolean_t is_slice; /* * Expecting a devid string and an optional physical location and guid */ if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0) { zed_log_msg(LOG_INFO, "%s: no dev identifier\n", __func__); return (-1); } (void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath); (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0); zed_log_msg(LOG_INFO, "zfs_deliver_add: adding %s (%s) (is_slice %d)", devid, devpath ? devpath : "NULL", is_slice); /* * Iterate over all vdevs looking for a match in the following order: * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk) * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location). * 3. ZPOOL_CONFIG_GUID (identifies unique vdev). * 4. ZPOOL_CONFIG_PATH for /dev/disk/by-vdev devices only (since * by-vdev paths represent physical paths). */ if (devid_iter(devid, zfs_process_add, is_slice)) return (0); if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add, is_slice, vdev_guid)) return (0); if (vdev_guid != 0) (void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add, is_slice); if (devpath != NULL) { /* Can we match a /dev/disk/by-vdev/ path? */ char by_vdev_path[MAXPATHLEN]; snprintf(by_vdev_path, sizeof (by_vdev_path), "/dev/disk/by-vdev/%s", devpath); if (by_vdev_path_iter(by_vdev_path, devid, zfs_process_add, is_slice)) return (0); } return (0); } /* * Called when we receive a VDEV_CHECK event, which indicates a device could not * be opened during initial pool open, but the autoreplace property was set on * the pool. In this case, we treat it as if it were an add event. */ static int zfs_deliver_check(nvlist_t *nvl) { dev_data_t data = { 0 }; if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &data.dd_pool_guid) != 0 || nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &data.dd_vdev_guid) != 0 || data.dd_vdev_guid == 0) return (0); zed_log_msg(LOG_INFO, "zfs_deliver_check: pool '%llu', vdev %llu", data.dd_pool_guid, data.dd_vdev_guid); data.dd_func = zfs_process_add; (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data); return (0); } /* * Given a path to a vdev, lookup the vdev's physical size from its * config nvlist. * * Returns the vdev's physical size in bytes on success, 0 on error. */ static uint64_t vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path) { nvlist_t *nvl = NULL; boolean_t avail_spare, l2cache, log; vdev_stat_t *vs = NULL; uint_t c; nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log); if (!nvl) return (0); verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (!vs) { zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__, vdev_path); return (0); } return (vs->vs_pspace); } /* * Given a path to a vdev, lookup if the vdev is a "whole disk" in the * config nvlist. "whole disk" means that ZFS was passed a whole disk * at pool creation time, which it partitioned up and has full control over. * Thus a partition with wholedisk=1 set tells us that zfs created the * partition at creation time. A partition without whole disk set would have * been created by externally (like with fdisk) and passed to ZFS. * * Returns the whole disk value (either 0 or 1). */ static uint64_t vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path) { nvlist_t *nvl = NULL; boolean_t avail_spare, l2cache, log; uint64_t wholedisk = 0; nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log); if (!nvl) return (0); (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); return (wholedisk); } /* * If the device size grew more than 1% then return true. */ #define DEVICE_GREW(oldsize, newsize) \ ((newsize > oldsize) && \ ((newsize / (newsize - oldsize)) <= 100)) static int zfsdle_vdev_online(zpool_handle_t *zhp, void *data) { boolean_t avail_spare, l2cache; nvlist_t *udev_nvl = data; nvlist_t *tgt; int error; char *tmp_devname, devname[MAXPATHLEN] = ""; uint64_t guid; if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) { sprintf(devname, "%llu", (u_longlong_t)guid); } else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH, &tmp_devname) == 0) { strlcpy(devname, tmp_devname, MAXPATHLEN); zfs_append_partition(devname, MAXPATHLEN); } else { zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__); } zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'", devname, zpool_get_name(zhp)); if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, &avail_spare, &l2cache, NULL)) != NULL) { char *path, fullpath[MAXPATHLEN]; uint64_t wholedisk = 0; error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path); if (error) { zpool_close(zhp); return (0); } (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); if (wholedisk) { path = strrchr(path, '/'); if (path != NULL) { path = zfs_strip_partition(path + 1); if (path == NULL) { zpool_close(zhp); return (0); } } else { zpool_close(zhp); return (0); } (void) strlcpy(fullpath, path, sizeof (fullpath)); free(path); /* * We need to reopen the pool associated with this * device so that the kernel can update the size of * the expanded device. When expanding there is no * need to restart the scrub from the beginning. */ boolean_t scrub_restart = B_FALSE; (void) zpool_reopen_one(zhp, &scrub_restart); } else { (void) strlcpy(fullpath, path, sizeof (fullpath)); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { vdev_state_t newstate; if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { /* * If this disk size has not changed, then * there's no need to do an autoexpand. To * check we look at the disk's size in its * config, and compare it to the disk size * that udev is reporting. */ uint64_t udev_size = 0, conf_size = 0, wholedisk = 0, udev_parent_size = 0; /* * Get the size of our disk that udev is * reporting. */ if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE, &udev_size) != 0) { udev_size = 0; } /* * Get the size of our disk's parent device * from udev (where sda1's parent is sda). */ if (nvlist_lookup_uint64(udev_nvl, DEV_PARENT_SIZE, &udev_parent_size) != 0) { udev_parent_size = 0; } conf_size = vdev_size_from_config(zhp, fullpath); wholedisk = vdev_whole_disk_from_config(zhp, fullpath); /* * Only attempt an autoexpand if the vdev size * changed. There are two different cases * to consider. * * 1. wholedisk=1 * If you do a 'zpool create' on a whole disk * (like /dev/sda), then zfs will create * partitions on the disk (like /dev/sda1). In * that case, wholedisk=1 will be set in the * partition's nvlist config. So zed will need * to see if your parent device (/dev/sda) * expanded in size, and if so, then attempt * the autoexpand. * * 2. wholedisk=0 * If you do a 'zpool create' on an existing * partition, or a device that doesn't allow * partitions, then wholedisk=0, and you will * simply need to check if the device itself * expanded in size. */ if (DEVICE_GREW(conf_size, udev_size) || (wholedisk && DEVICE_GREW(conf_size, udev_parent_size))) { error = zpool_vdev_online(zhp, fullpath, 0, &newstate); zed_log_msg(LOG_INFO, "%s: autoexpanding '%s' from %llu" " to %llu bytes in pool '%s': %d", __func__, fullpath, conf_size, MAX(udev_size, udev_parent_size), zpool_get_name(zhp), error); } } } zpool_close(zhp); return (1); } zpool_close(zhp); return (0); } /* * This function handles the ESC_DEV_DLE device change event. Use the * provided vdev guid when looking up a disk or partition, when the guid * is not present assume the entire disk is owned by ZFS and append the * expected -part1 partition information then lookup by physical path. */ static int zfs_deliver_dle(nvlist_t *nvl) { char *devname, name[MAXPATHLEN]; uint64_t guid; if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &guid) == 0) { sprintf(name, "%llu", (u_longlong_t)guid); } else if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) == 0) { strlcpy(name, devname, MAXPATHLEN); zfs_append_partition(name, MAXPATHLEN); } else { sprintf(name, "unknown"); zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath"); } if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) { zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not " "found", name); return (1); } return (0); } /* * syseventd daemon module event handler * * Handles syseventd daemon zfs device related events: * * EC_DEV_ADD.ESC_DISK * EC_DEV_STATUS.ESC_DEV_DLE * EC_ZFS.ESC_ZFS_VDEV_CHECK * * Note: assumes only one thread active at a time (not thread safe) */ static int zfs_slm_deliver_event(const char *class, const char *subclass, nvlist_t *nvl) { int ret; boolean_t is_lofi = B_FALSE, is_check = B_FALSE, is_dle = B_FALSE; if (strcmp(class, EC_DEV_ADD) == 0) { /* * We're mainly interested in disk additions, but we also listen * for new loop devices, to allow for simplified testing. */ if (strcmp(subclass, ESC_DISK) == 0) is_lofi = B_FALSE; else if (strcmp(subclass, ESC_LOFI) == 0) is_lofi = B_TRUE; else return (0); is_check = B_FALSE; } else if (strcmp(class, EC_ZFS) == 0 && strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) { /* * This event signifies that a device failed to open * during pool load, but the 'autoreplace' property was * set, so we should pretend it's just been added. */ is_check = B_TRUE; } else if (strcmp(class, EC_DEV_STATUS) == 0 && strcmp(subclass, ESC_DEV_DLE) == 0) { is_dle = B_TRUE; } else { return (0); } if (is_dle) ret = zfs_deliver_dle(nvl); else if (is_check) ret = zfs_deliver_check(nvl); else ret = zfs_deliver_add(nvl, is_lofi); return (ret); } /*ARGSUSED*/ static void * zfs_enum_pools(void *arg) { (void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list); /* * Linux - instead of using a thread pool, each list entry * will spawn a thread when an unavailable pool transitions * to available. zfs_slm_fini will wait for these threads. */ g_enumeration_done = B_TRUE; return (NULL); } /* * called from zed daemon at startup * * sent messages from zevents or udev monitor * * For now, each agent has its own libzfs instance */ int zfs_slm_init(void) { if ((g_zfshdl = libzfs_init()) == NULL) return (-1); /* * collect a list of unavailable pools (asynchronously, * since this can take a while) */ list_create(&g_pool_list, sizeof (struct unavailpool), offsetof(struct unavailpool, uap_node)); if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) { list_destroy(&g_pool_list); libzfs_fini(g_zfshdl); return (-1); } pthread_setname_np(g_zfs_tid, "enum-pools"); list_create(&g_device_list, sizeof (struct pendingdev), offsetof(struct pendingdev, pd_node)); return (0); } void zfs_slm_fini(void) { unavailpool_t *pool; pendingdev_t *device; /* wait for zfs_enum_pools thread to complete */ (void) pthread_join(g_zfs_tid, NULL); /* destroy the thread pool */ if (g_tpool != NULL) { tpool_wait(g_tpool); tpool_destroy(g_tpool); } while ((pool = (list_head(&g_pool_list))) != NULL) { list_remove(&g_pool_list, pool); zpool_close(pool->uap_zhp); free(pool); } list_destroy(&g_pool_list); while ((device = (list_head(&g_device_list))) != NULL) { list_remove(&g_device_list, device); free(device); } list_destroy(&g_device_list); libzfs_fini(g_zfshdl); } void zfs_slm_event(const char *class, const char *subclass, nvlist_t *nvl) { zed_log_msg(LOG_INFO, "zfs_slm_event: %s.%s", class, subclass); (void) zfs_slm_deliver_event(class, subclass, nvl); } diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index abfa2b7f6b90..486b8a1ac2b5 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -1,724 +1,704 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2016 Igor Kozhukhov . */ #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" /* * Private interface for iterating over pools specified on the command line. * Most consumers will call for_each_pool, but in order to support iostat, we * allow fined grained control through the zpool_list_t interface. */ typedef struct zpool_node { zpool_handle_t *zn_handle; uu_avl_node_t zn_avlnode; int zn_mark; } zpool_node_t; struct zpool_list { boolean_t zl_findall; boolean_t zl_literal; uu_avl_t *zl_avl; uu_avl_pool_t *zl_pool; zprop_list_t **zl_proplist; }; /* ARGSUSED */ static int zpool_compare(const void *larg, const void *rarg, void *unused) { zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; const char *lname = zpool_get_name(l); const char *rname = zpool_get_name(r); return (strcmp(lname, rname)); } /* * Callback function for pool_list_get(). Adds the given pool to the AVL tree * of known pools. */ static int add_pool(zpool_handle_t *zhp, void *data) { zpool_list_t *zlp = data; zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); uu_avl_index_t idx; node->zn_handle = zhp; uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { if (zlp->zl_proplist && zpool_expand_proplist(zhp, zlp->zl_proplist, zlp->zl_literal) != 0) { zpool_close(zhp); free(node); return (-1); } uu_avl_insert(zlp->zl_avl, node, idx); } else { zpool_close(zhp); free(node); return (-1); } return (0); } /* * Create a list of pools based on the given arguments. If we're given no * arguments, then iterate over all pools in the system and add them to the AVL * tree. Otherwise, add only those pool explicitly specified on the command * line. */ zpool_list_t * pool_list_get(int argc, char **argv, zprop_list_t **proplist, boolean_t literal, int *err) { zpool_list_t *zlp; zlp = safe_malloc(sizeof (zpool_list_t)); zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t), offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT); if (zlp->zl_pool == NULL) zpool_no_memory(); if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL, UU_DEFAULT)) == NULL) zpool_no_memory(); zlp->zl_proplist = proplist; zlp->zl_literal = literal; if (argc == 0) { (void) zpool_iter(g_zfs, add_pool, zlp); zlp->zl_findall = B_TRUE; } else { int i; for (i = 0; i < argc; i++) { zpool_handle_t *zhp; if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != NULL) { if (add_pool(zhp, zlp) != 0) *err = B_TRUE; } else { *err = B_TRUE; } } } return (zlp); } /* * Search for any new pools, adding them to the list. We only add pools when no * options were given on the command line. Otherwise, we keep the list fixed as * those that were explicitly specified. */ void pool_list_update(zpool_list_t *zlp) { if (zlp->zl_findall) (void) zpool_iter(g_zfs, add_pool, zlp); } /* * Iterate over all pools in the list, executing the callback for each */ int pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, void *data) { zpool_node_t *node, *next_node; int ret = 0; for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) { next_node = uu_avl_next(zlp->zl_avl, node); if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL || unavail) ret |= func(node->zn_handle, data); } return (ret); } /* * Remove the given pool from the list. When running iostat, we want to remove * those pools that no longer exist. */ void pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) { zpool_node_t search, *node; search.zn_handle = zhp; if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { uu_avl_remove(zlp->zl_avl, node); zpool_close(node->zn_handle); free(node); } } /* * Free all the handles associated with this list. */ void pool_list_free(zpool_list_t *zlp) { uu_avl_walk_t *walk; zpool_node_t *node; if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) { (void) fprintf(stderr, gettext("internal error: out of memory")); exit(1); } while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(zlp->zl_avl, node); zpool_close(node->zn_handle); free(node); } uu_avl_walk_end(walk); uu_avl_destroy(zlp->zl_avl); uu_avl_pool_destroy(zlp->zl_pool); free(zlp); } /* * Returns the number of elements in the pool list. */ int pool_list_count(zpool_list_t *zlp) { return (uu_avl_numnodes(zlp->zl_avl)); } /* * High level function which iterates over all pools given on the command line, * using the pool_list_* interfaces. */ int for_each_pool(int argc, char **argv, boolean_t unavail, zprop_list_t **proplist, boolean_t literal, zpool_iter_f func, void *data) { zpool_list_t *list; int ret = 0; if ((list = pool_list_get(argc, argv, proplist, literal, &ret)) == NULL) return (1); if (pool_list_iter(list, unavail, func, data) != 0) ret = 1; pool_list_free(list); return (ret); } /* * This is the equivalent of for_each_pool() for vdevs. It iterates thorough * all vdevs in the pool, ignoring root vdevs and holes, calling func() on * each one. * * @zhp: Zpool handle * @func: Function to call on each vdev * @data: Custom data to pass to the function */ int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data) { nvlist_t *config, *nvroot = NULL; if ((config = zpool_get_config(zhp, NULL)) != NULL) { verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); } return (for_each_vdev_cb((void *) zhp, nvroot, func, data)); } /* * Process the vcdl->vdev_cmd_data[] array to figure out all the unique column * names and their widths. When this function is done, vcdl->uniq_cols, * vcdl->uniq_cols_cnt, and vcdl->uniq_cols_width will be filled in. */ static void process_unique_cmd_columns(vdev_cmd_data_list_t *vcdl) { char **uniq_cols = NULL, **tmp = NULL; int *uniq_cols_width; vdev_cmd_data_t *data; int cnt = 0; int k; /* For each vdev */ for (int i = 0; i < vcdl->count; i++) { data = &vcdl->data[i]; /* For each column the vdev reported */ for (int j = 0; j < data->cols_cnt; j++) { /* Is this column in our list of unique column names? */ for (k = 0; k < cnt; k++) { if (strcmp(data->cols[j], uniq_cols[k]) == 0) break; /* yes it is */ } if (k == cnt) { /* No entry for column, add to list */ tmp = realloc(uniq_cols, sizeof (*uniq_cols) * (cnt + 1)); if (tmp == NULL) break; /* Nothing we can do... */ uniq_cols = tmp; uniq_cols[cnt] = data->cols[j]; cnt++; } } } /* * We now have a list of all the unique column names. Figure out the * max width of each column by looking at the column name and all its * values. */ uniq_cols_width = safe_malloc(sizeof (*uniq_cols_width) * cnt); for (int i = 0; i < cnt; i++) { /* Start off with the column title's width */ uniq_cols_width[i] = strlen(uniq_cols[i]); /* For each vdev */ for (int j = 0; j < vcdl->count; j++) { /* For each of the vdev's values in a column */ data = &vcdl->data[j]; for (k = 0; k < data->cols_cnt; k++) { /* Does this vdev have a value for this col? */ if (strcmp(data->cols[k], uniq_cols[i]) == 0) { /* Is the value width larger? */ uniq_cols_width[i] = MAX(uniq_cols_width[i], strlen(data->lines[k])); } } } } vcdl->uniq_cols = uniq_cols; vcdl->uniq_cols_cnt = cnt; vcdl->uniq_cols_width = uniq_cols_width; } /* * Process a line of command output * * When running 'zpool iostat|status -c' the lines of output can either be * in the form of: * * column_name=value * * Or just: * * value * * Process the column_name (if any) and value. * * Returns 0 if line was processed, and there are more lines can still be * processed. * * Returns 1 if this was the last line to process, or error. */ static int vdev_process_cmd_output(vdev_cmd_data_t *data, char *line) { char *col = NULL; char *val = line; char *equals; char **tmp; if (line == NULL) return (1); equals = strchr(line, '='); if (equals != NULL) { /* * We have a 'column=value' type line. Split it into the * column and value strings by turning the '=' into a '\0'. */ *equals = '\0'; col = line; val = equals + 1; } else { val = line; } /* Do we already have a column by this name? If so, skip it. */ if (col != NULL) { for (int i = 0; i < data->cols_cnt; i++) { if (strcmp(col, data->cols[i]) == 0) return (0); /* Duplicate, skip */ } } if (val != NULL) { tmp = realloc(data->lines, (data->lines_cnt + 1) * sizeof (*data->lines)); if (tmp == NULL) return (1); data->lines = tmp; data->lines[data->lines_cnt] = strdup(val); data->lines_cnt++; } if (col != NULL) { tmp = realloc(data->cols, (data->cols_cnt + 1) * sizeof (*data->cols)); if (tmp == NULL) return (1); data->cols = tmp; data->cols[data->cols_cnt] = strdup(col); data->cols_cnt++; } if (val != NULL && col == NULL) return (1); return (0); } /* * Run the cmd and store results in *data. */ static void vdev_run_cmd(vdev_cmd_data_t *data, char *cmd) { int rc; - char *argv[2] = {cmd, 0}; - char *env[5] = {"PATH=/bin:/sbin:/usr/bin:/usr/sbin", NULL, NULL, NULL, - NULL}; + char *argv[2] = {cmd}; + char **env; char **lines = NULL; int lines_cnt = 0; int i; - /* Setup our custom environment variables */ - rc = asprintf(&env[1], "VDEV_PATH=%s", - data->path ? data->path : ""); - if (rc == -1) { - env[1] = NULL; + env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath, + data->vdev_enc_sysfs_path, NULL, NULL); + if (env == NULL) goto out; - } - - rc = asprintf(&env[2], "VDEV_UPATH=%s", - data->upath ? data->upath : ""); - if (rc == -1) { - env[2] = NULL; - goto out; - } - - rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s", - data->vdev_enc_sysfs_path ? - data->vdev_enc_sysfs_path : ""); - if (rc == -1) { - env[3] = NULL; - goto out; - } /* Run the command */ rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines, &lines_cnt); + + zpool_vdev_script_free_env(env); + if (rc != 0) goto out; /* Process the output we got */ for (i = 0; i < lines_cnt; i++) if (vdev_process_cmd_output(data, lines[i]) != 0) break; out: if (lines != NULL) libzfs_free_str_array(lines, lines_cnt); - - /* Start with i = 1 since env[0] was statically allocated */ - for (i = 1; i < ARRAY_SIZE(env); i++) - free(env[i]); } /* * Generate the search path for zpool iostat/status -c scripts. * The string returned must be freed. */ char * zpool_get_cmd_search_path(void) { const char *env; char *sp = NULL; env = getenv("ZPOOL_SCRIPTS_PATH"); if (env != NULL) return (strdup(env)); env = getenv("HOME"); if (env != NULL) { if (asprintf(&sp, "%s/.zpool.d:%s", env, ZPOOL_SCRIPTS_DIR) != -1) { return (sp); } } if (asprintf(&sp, "%s", ZPOOL_SCRIPTS_DIR) != -1) return (sp); return (NULL); } /* Thread function run for each vdev */ static void vdev_run_cmd_thread(void *cb_cmd_data) { vdev_cmd_data_t *data = cb_cmd_data; char *cmd = NULL, *cmddup, *cmdrest; cmddup = strdup(data->cmd); if (cmddup == NULL) return; cmdrest = cmddup; while ((cmd = strtok_r(cmdrest, ",", &cmdrest))) { char *dir = NULL, *sp, *sprest; char fullpath[MAXPATHLEN]; if (strchr(cmd, '/') != NULL) continue; sp = zpool_get_cmd_search_path(); if (sp == NULL) continue; sprest = sp; while ((dir = strtok_r(sprest, ":", &sprest))) { if (snprintf(fullpath, sizeof (fullpath), "%s/%s", dir, cmd) == -1) continue; if (access(fullpath, X_OK) == 0) { vdev_run_cmd(data, fullpath); break; } } free(sp); } free(cmddup); } /* For each vdev in the pool run a command */ static int for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl) { vdev_cmd_data_list_t *vcdl = cb_vcdl; vdev_cmd_data_t *data; char *path = NULL; char *vname = NULL; char *vdev_enc_sysfs_path = NULL; int i, match = 0; zpool_handle_t *zhp = zhp_data; if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &vdev_enc_sysfs_path); /* Spares show more than once if they're in use, so skip if exists */ for (i = 0; i < vcdl->count; i++) { if ((strcmp(vcdl->data[i].path, path) == 0) && (strcmp(vcdl->data[i].pool, zpool_get_name(zhp)) == 0)) { /* vdev already exists, skip it */ return (0); } } /* Check for selected vdevs here, if any */ for (i = 0; i < vcdl->vdev_names_count; i++) { vname = zpool_vdev_name(g_zfs, zhp, nv, vcdl->cb_name_flags); if (strcmp(vcdl->vdev_names[i], vname) == 0) { free(vname); match = 1; break; /* match */ } free(vname); } /* If we selected vdevs, and this isn't one of them, then bail out */ if (!match && vcdl->vdev_names_count) return (0); /* * Resize our array and add in the new element. */ if (!(vcdl->data = realloc(vcdl->data, sizeof (*vcdl->data) * (vcdl->count + 1)))) return (ENOMEM); /* couldn't realloc */ data = &vcdl->data[vcdl->count]; data->pool = strdup(zpool_get_name(zhp)); data->path = strdup(path); data->upath = zfs_get_underlying_path(path); data->cmd = vcdl->cmd; data->lines = data->cols = NULL; data->lines_cnt = data->cols_cnt = 0; if (vdev_enc_sysfs_path) data->vdev_enc_sysfs_path = strdup(vdev_enc_sysfs_path); else data->vdev_enc_sysfs_path = NULL; vcdl->count++; return (0); } /* Get the names and count of the vdevs */ static int all_pools_for_each_vdev_gather_cb(zpool_handle_t *zhp, void *cb_vcdl) { return (for_each_vdev(zhp, for_each_vdev_run_cb, cb_vcdl)); } /* * Now that vcdl is populated with our complete list of vdevs, spawn * off the commands. */ static void all_pools_for_each_vdev_run_vcdl(vdev_cmd_data_list_t *vcdl) { tpool_t *t; t = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); if (t == NULL) return; /* Spawn off the command for each vdev */ for (int i = 0; i < vcdl->count; i++) { (void) tpool_dispatch(t, vdev_run_cmd_thread, (void *) &vcdl->data[i]); } /* Wait for threads to finish */ tpool_wait(t); tpool_destroy(t); } /* * Run command 'cmd' on all vdevs in all pools in argv. Saves the first line of * output from the command in vcdk->data[].line for all vdevs. If you want * to run the command on only certain vdevs, fill in g_zfs, vdev_names, * vdev_names_count, and cb_name_flags. Otherwise leave them as zero. * * Returns a vdev_cmd_data_list_t that must be freed with * free_vdev_cmd_data_list(); */ vdev_cmd_data_list_t * all_pools_for_each_vdev_run(int argc, char **argv, char *cmd, libzfs_handle_t *g_zfs, char **vdev_names, int vdev_names_count, int cb_name_flags) { vdev_cmd_data_list_t *vcdl; vcdl = safe_malloc(sizeof (vdev_cmd_data_list_t)); vcdl->cmd = cmd; vcdl->vdev_names = vdev_names; vcdl->vdev_names_count = vdev_names_count; vcdl->cb_name_flags = cb_name_flags; vcdl->g_zfs = g_zfs; /* Gather our list of all vdevs in all pools */ for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, all_pools_for_each_vdev_gather_cb, vcdl); /* Run command on all vdevs in all pools */ all_pools_for_each_vdev_run_vcdl(vcdl); /* * vcdl->data[] now contains all the column names and values for each * vdev. We need to process that into a master list of unique column * names, and figure out the width of each column. */ process_unique_cmd_columns(vcdl); return (vcdl); } /* * Free the vdev_cmd_data_list_t created by all_pools_for_each_vdev_run() */ void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl) { free(vcdl->uniq_cols); free(vcdl->uniq_cols_width); for (int i = 0; i < vcdl->count; i++) { free(vcdl->data[i].path); free(vcdl->data[i].pool); free(vcdl->data[i].upath); for (int j = 0; j < vcdl->data[i].lines_cnt; j++) free(vcdl->data[i].lines[j]); free(vcdl->data[i].lines); for (int j = 0; j < vcdl->data[i].cols_cnt; j++) free(vcdl->data[i].cols[j]); free(vcdl->data[i].cols); free(vcdl->data[i].vdev_enc_sysfs_path); } free(vcdl->data); free(vcdl); } diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index da75866f5145..9e9c9c525331 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -1,138 +1,142 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef ZPOOL_UTIL_H #define ZPOOL_UTIL_H #include #include #include #ifdef __cplusplus extern "C" { #endif /* Path to scripts you can run with "zpool status/iostat -c" */ #define ZPOOL_SCRIPTS_DIR SYSCONFDIR"/zfs/zpool.d" /* * Basic utility functions */ void *safe_malloc(size_t); void zpool_no_memory(void); uint_t num_logs(nvlist_t *nv); uint64_t array64_max(uint64_t array[], unsigned int len); int highbit64(uint64_t i); int lowbit64(uint64_t i); /* * Misc utility functions */ char *zpool_get_cmd_search_path(void); /* * Virtual device functions */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, boolean_t replacing, boolean_t dryrun, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); /* * Pool list functions */ int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, boolean_t, zpool_iter_f, void *); /* Vdev list functions */ int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data); typedef struct zpool_list zpool_list_t; zpool_list_t *pool_list_get(int, char **, zprop_list_t **, boolean_t, int *); void pool_list_update(zpool_list_t *); int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); void pool_list_free(zpool_list_t *); int pool_list_count(zpool_list_t *); void pool_list_remove(zpool_list_t *, zpool_handle_t *); extern libzfs_handle_t *g_zfs; typedef struct vdev_cmd_data { char **lines; /* Array of lines of output, minus the column name */ int lines_cnt; /* Number of lines in the array */ char **cols; /* Array of column names */ int cols_cnt; /* Number of column names */ char *path; /* vdev path */ char *upath; /* vdev underlying path */ char *pool; /* Pool name */ char *cmd; /* backpointer to cmd */ char *vdev_enc_sysfs_path; /* enclosure sysfs path (if any) */ } vdev_cmd_data_t; typedef struct vdev_cmd_data_list { char *cmd; /* Command to run */ unsigned int count; /* Number of vdev_cmd_data items (vdevs) */ /* fields used to select only certain vdevs, if requested */ libzfs_handle_t *g_zfs; char **vdev_names; int vdev_names_count; int cb_name_flags; vdev_cmd_data_t *data; /* Array of vdevs */ /* List of unique column names and widths */ char **uniq_cols; int uniq_cols_cnt; int *uniq_cols_width; } vdev_cmd_data_list_t; vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv, char *cmd, libzfs_handle_t *g_zfs, char **vdev_names, int vdev_names_count, int cb_name_flags); void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl); +void free_vdev_cmd_data(vdev_cmd_data_t *data); + +int vdev_run_cmd_simple(char *path, char *cmd); + int check_device(const char *path, boolean_t force, boolean_t isspare, boolean_t iswholedisk); boolean_t check_sector_size_database(char *path, int *sector_size); void vdev_error(const char *fmt, ...); int check_file(const char *file, boolean_t force, boolean_t isspare); void after_zpool_upgrade(zpool_handle_t *zhp); #ifdef __cplusplus } #endif #endif /* ZPOOL_UTIL_H */ diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 3d83da641ecb..84b38cf9b1bc 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -1,1870 +1,1899 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright (c) 2016, 2017 Intel Corporation. * Copyright 2016 Igor Kozhukhov . */ /* * Functions to convert between a list of vdevs and an nvlist representing the * configuration. Each entry in the list can be one of: * * Device vdevs * disk=(path=..., devid=...) * file=(path=...) * * Group vdevs * raidz[1|2]=(...) * mirror=(...) * * Hot spares * * While the underlying implementation supports it, group vdevs cannot contain * other group vdevs. All userland verification of devices is contained within * this file. If successful, the nvlist returned can be passed directly to the * kernel; we've done as much verification as possible in userland. * * Hot spares are a special case, and passed down as an array of disk vdevs, at * the same level as the root of the vdev tree. * * The only function exported by this file is 'make_root_vdev'. The * function performs several passes: * * 1. Construct the vdev specification. Performs syntax validation and * makes sure each device is valid. * 2. Check for devices in use. Using libblkid to make sure that no * devices are also in use. Some can be overridden using the 'force' * flag, others cannot. * 3. Check for replication errors if the 'force' flag is not specified. * validates that the replication level is consistent across the * entire pool. * 4. Call libzfs to label any whole disks with an EFI label. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" #include #include /* * For any given vdev specification, we can have multiple errors. The * vdev_error() function keeps track of whether we have seen an error yet, and * prints out a header if its the first error we've seen. */ boolean_t error_seen; boolean_t is_force; /*PRINTFLIKE1*/ void vdev_error(const char *fmt, ...) { va_list ap; if (!error_seen) { (void) fprintf(stderr, gettext("invalid vdev specification\n")); if (!is_force) (void) fprintf(stderr, gettext("use '-f' to override " "the following errors:\n")); else (void) fprintf(stderr, gettext("the following errors " "must be manually repaired:\n")); error_seen = B_TRUE; } va_start(ap, fmt); (void) vfprintf(stderr, fmt, ap); va_end(ap); } /* * Check that a file is valid. All we can do in this case is check that it's * not in use by another pool, and not in use by swap. */ int check_file(const char *file, boolean_t force, boolean_t isspare) { char *name; int fd; int ret = 0; pool_state_t state; boolean_t inuse; if ((fd = open(file, O_RDONLY)) < 0) return (0); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { const char *desc; switch (state) { case POOL_STATE_ACTIVE: desc = gettext("active"); break; case POOL_STATE_EXPORTED: desc = gettext("exported"); break; case POOL_STATE_POTENTIALLY_ACTIVE: desc = gettext("potentially active"); break; default: desc = gettext("unknown"); break; } /* * Allow hot spares to be shared between pools. */ if (state == POOL_STATE_SPARE && isspare) { free(name); (void) close(fd); return (0); } if (state == POOL_STATE_ACTIVE || state == POOL_STATE_SPARE || !force) { switch (state) { case POOL_STATE_SPARE: vdev_error(gettext("%s is reserved as a hot " "spare for pool %s\n"), file, name); break; default: vdev_error(gettext("%s is part of %s pool " "'%s'\n"), file, desc, name); break; } ret = -1; } free(name); } (void) close(fd); return (ret); } /* * This may be a shorthand device path or it could be total gibberish. * Check to see if it is a known device available in zfs_vdev_paths. * As part of this check, see if we've been given an entire disk * (minus the slice number). */ static int is_shorthand_path(const char *arg, char *path, size_t path_size, struct stat64 *statbuf, boolean_t *wholedisk) { int error; error = zfs_resolve_shortname(arg, path, path_size); if (error == 0) { *wholedisk = zfs_dev_is_whole_disk(path); if (*wholedisk || (stat64(path, statbuf) == 0)) return (0); } strlcpy(path, arg, path_size); memset(statbuf, 0, sizeof (*statbuf)); *wholedisk = B_FALSE; return (error); } /* * Determine if the given path is a hot spare within the given configuration. * If no configuration is given we rely solely on the label. */ static boolean_t is_spare(nvlist_t *config, const char *path) { int fd; pool_state_t state; char *name = NULL; nvlist_t *label; uint64_t guid, spareguid; nvlist_t *nvroot; nvlist_t **spares; uint_t i, nspares; boolean_t inuse; if (zpool_is_draid_spare(path)) return (B_TRUE); if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) return (B_FALSE); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || !inuse || state != POOL_STATE_SPARE || zpool_read_label(fd, &label, NULL) != 0) { free(name); (void) close(fd); return (B_FALSE); } free(name); (void) close(fd); if (config == NULL) { nvlist_free(label); return (B_TRUE); } verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); nvlist_free(label); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { for (i = 0; i < nspares; i++) { verify(nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, &spareguid) == 0); if (spareguid == guid) return (B_TRUE); } } return (B_FALSE); } /* * Create a leaf vdev. Determine if this is a file or a device. If it's a * device, fill in the device id to make a complete nvlist. Valid forms for a * leaf vdev are: * * /dev/xxx Complete disk path * /xxx Full path to file * xxx Shorthand for /xxx * draid* Virtual dRAID spare */ static nvlist_t * make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) { char path[MAXPATHLEN]; struct stat64 statbuf; nvlist_t *vdev = NULL; char *type = NULL; boolean_t wholedisk = B_FALSE; uint64_t ashift = 0; int err; /* * Determine what type of vdev this is, and put the full path into * 'path'. We detect whether this is a device of file afterwards by * checking the st_mode of the file. */ if (arg[0] == '/') { /* * Complete device or file path. Exact type is determined by * examining the file descriptor afterwards. Symbolic links * are resolved to their real paths to determine whole disk * and S_ISBLK/S_ISREG type checks. However, we are careful * to store the given path as ZPOOL_CONFIG_PATH to ensure we * can leverage udev's persistent device labels. */ if (realpath(arg, path) == NULL) { (void) fprintf(stderr, gettext("cannot resolve path '%s'\n"), arg); return (NULL); } wholedisk = zfs_dev_is_whole_disk(path); if (!wholedisk && (stat64(path, &statbuf) != 0)) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), path, strerror(errno)); return (NULL); } /* After whole disk check restore original passed path */ strlcpy(path, arg, sizeof (path)); } else if (zpool_is_draid_spare(arg)) { if (!is_primary) { (void) fprintf(stderr, gettext("cannot open '%s': dRAID spares can only " "be used to replace primary vdevs\n"), arg); return (NULL); } wholedisk = B_TRUE; strlcpy(path, arg, sizeof (path)); type = VDEV_TYPE_DRAID_SPARE; } else { err = is_shorthand_path(arg, path, sizeof (path), &statbuf, &wholedisk); if (err != 0) { /* * If we got ENOENT, then the user gave us * gibberish, so try to direct them with a * reasonable error message. Otherwise, * regurgitate strerror() since it's the best we * can do. */ if (err == ENOENT) { (void) fprintf(stderr, gettext("cannot open '%s': no such " "device in %s\n"), arg, DISK_ROOT); (void) fprintf(stderr, gettext("must be a full path or " "shorthand device name\n")); return (NULL); } else { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), path, strerror(errno)); return (NULL); } } } if (type == NULL) { /* * Determine whether this is a device or a file. */ if (wholedisk || S_ISBLK(statbuf.st_mode)) { type = VDEV_TYPE_DISK; } else if (S_ISREG(statbuf.st_mode)) { type = VDEV_TYPE_FILE; } else { fprintf(stderr, gettext("cannot use '%s': must " "be a block device or regular file\n"), path); return (NULL); } } /* * Finally, we have the complete device or file, and we know that it is * acceptable to use. Construct the nvlist to describe this vdev. All * vdevs have a 'path' element, and devices also have a 'devid' element. */ verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); if (strcmp(type, VDEV_TYPE_DISK) == 0) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); /* * Override defaults if custom properties are provided. */ if (props != NULL) { char *value = NULL; if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { (void) fprintf(stderr, gettext("ashift must be a number.\n")); return (NULL); } if (ashift != 0 && (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { (void) fprintf(stderr, gettext("invalid 'ashift=%" PRIu64 "' " "property: only values between %" PRId32 " " "and %" PRId32 " are allowed.\n"), ashift, ASHIFT_MIN, ASHIFT_MAX); return (NULL); } } } /* * If the device is known to incorrectly report its physical sector * size explicitly provide the known correct value. */ if (ashift == 0) { int sector_size; if (check_sector_size_database(path, §or_size) == B_TRUE) ashift = highbit64(sector_size) - 1; } if (ashift > 0) (void) nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift); return (vdev); } /* * Go through and verify the replication level of the pool is consistent. * Performs the following checks: * * For the new spec, verifies that devices in mirrors and raidz are the * same size. * * If the current configuration already has inconsistent replication * levels, ignore any other potential problems in the new spec. * * Otherwise, make sure that the current spec (if there is one) and the new * spec have consistent replication levels. * * If there is no current spec (create), make sure new spec has at least * one general purpose vdev. */ typedef struct replication_level { char *zprl_type; uint64_t zprl_children; uint64_t zprl_parity; } replication_level_t; #define ZPOOL_FUZZ (16 * 1024 * 1024) /* * N.B. For the purposes of comparing replication levels dRAID can be * considered functionally equivalent to raidz. */ static boolean_t is_raidz_mirror(replication_level_t *a, replication_level_t *b, replication_level_t **raidz, replication_level_t **mirror) { if ((strcmp(a->zprl_type, "raidz") == 0 || strcmp(a->zprl_type, "draid") == 0) && strcmp(b->zprl_type, "mirror") == 0) { *raidz = a; *mirror = b; return (B_TRUE); } return (B_FALSE); } /* * Comparison for determining if dRAID and raidz where passed in either order. */ static boolean_t is_raidz_draid(replication_level_t *a, replication_level_t *b) { if ((strcmp(a->zprl_type, "raidz") == 0 || strcmp(a->zprl_type, "draid") == 0) && (strcmp(b->zprl_type, "raidz") == 0 || strcmp(b->zprl_type, "draid") == 0)) { return (B_TRUE); } return (B_FALSE); } /* * Given a list of toplevel vdevs, return the current replication level. If * the config is inconsistent, then NULL is returned. If 'fatal' is set, then * an error message will be displayed for each self-inconsistent vdev. */ static replication_level_t * get_replication(nvlist_t *nvroot, boolean_t fatal) { nvlist_t **top; uint_t t, toplevels; nvlist_t **child; uint_t c, children; nvlist_t *nv; char *type; replication_level_t lastrep = {0}; replication_level_t rep; replication_level_t *ret; replication_level_t *raidz, *mirror; boolean_t dontreport; ret = safe_malloc(sizeof (replication_level_t)); verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &top, &toplevels) == 0); for (t = 0; t < toplevels; t++) { uint64_t is_log = B_FALSE; nv = top[t]; /* * For separate logs we ignore the top level vdev replication * constraints. */ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); if (is_log) continue; /* Ignore holes introduced by removing aux devices */ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_HOLE) == 0) continue; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { /* * This is a 'file' or 'disk' vdev. */ rep.zprl_type = type; rep.zprl_children = 1; rep.zprl_parity = 0; } else { int64_t vdev_size; /* * This is a mirror or RAID-Z vdev. Go through and make * sure the contents are all the same (files vs. disks), * keeping track of the number of elements in the * process. * * We also check that the size of each vdev (if it can * be determined) is the same. */ rep.zprl_type = type; rep.zprl_children = 0; if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || strcmp(type, VDEV_TYPE_DRAID) == 0) { verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &rep.zprl_parity) == 0); assert(rep.zprl_parity != 0); } else { rep.zprl_parity = 0; } /* * The 'dontreport' variable indicates that we've * already reported an error for this spec, so don't * bother doing it again. */ type = NULL; dontreport = 0; vdev_size = -1LL; for (c = 0; c < children; c++) { nvlist_t *cnv = child[c]; char *path; struct stat64 statbuf; int64_t size = -1LL; char *childtype; int fd, err; rep.zprl_children++; verify(nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &childtype) == 0); /* * If this is a replacing or spare vdev, then * get the real first child of the vdev: do this * in a loop because replacing and spare vdevs * can be nested. */ while (strcmp(childtype, VDEV_TYPE_REPLACING) == 0 || strcmp(childtype, VDEV_TYPE_SPARE) == 0) { nvlist_t **rchild; uint_t rchildren; verify(nvlist_lookup_nvlist_array(cnv, ZPOOL_CONFIG_CHILDREN, &rchild, &rchildren) == 0); assert(rchildren == 2); cnv = rchild[0]; verify(nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &childtype) == 0); } verify(nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &path) == 0); /* * If we have a raidz/mirror that combines disks * with files, report it as an error. */ if (!dontreport && type != NULL && strcmp(type, childtype) != 0) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication " "level: %s contains both " "files and devices\n"), rep.zprl_type); else return (NULL); dontreport = B_TRUE; } /* * According to stat(2), the value of 'st_size' * is undefined for block devices and character * devices. But there is no effective way to * determine the real size in userland. * * Instead, we'll take advantage of an * implementation detail of spec_size(). If the * device is currently open, then we (should) * return a valid size. * * If we still don't get a valid size (indicated * by a size of 0 or MAXOFFSET_T), then ignore * this device altogether. */ if ((fd = open(path, O_RDONLY)) >= 0) { err = fstat64_blk(fd, &statbuf); (void) close(fd); } else { err = stat64(path, &statbuf); } if (err != 0 || statbuf.st_size == 0 || statbuf.st_size == MAXOFFSET_T) continue; size = statbuf.st_size; /* * Also make sure that devices and * slices have a consistent size. If * they differ by a significant amount * (~16MB) then report an error. */ if (!dontreport && (vdev_size != -1LL && (llabs(size - vdev_size) > ZPOOL_FUZZ))) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "%s contains devices of " "different sizes\n"), rep.zprl_type); else return (NULL); dontreport = B_TRUE; } type = childtype; vdev_size = size; } } /* * At this point, we have the replication of the last toplevel * vdev in 'rep'. Compare it to 'lastrep' to see if it is * different. */ if (lastrep.zprl_type != NULL) { if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) || is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) { /* * Accepted raidz and mirror when they can * handle the same number of disk failures. */ if (raidz->zprl_parity != mirror->zprl_children - 1) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication " "level: " "%s and %s vdevs with " "different redundancy, " "%llu vs. %llu (%llu-way) " "are present\n"), raidz->zprl_type, mirror->zprl_type, raidz->zprl_parity, mirror->zprl_children - 1, mirror->zprl_children); else return (NULL); } } else if (is_raidz_draid(&lastrep, &rep)) { /* * Accepted raidz and draid when they can * handle the same number of disk failures. */ if (lastrep.zprl_parity != rep.zprl_parity) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication " "level: %s and %s vdevs " "with different " "redundancy, %llu vs. " "%llu are present\n"), lastrep.zprl_type, rep.zprl_type, lastrep.zprl_parity, rep.zprl_parity); else return (NULL); } } else if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication level: " "both %s and %s vdevs are " "present\n"), lastrep.zprl_type, rep.zprl_type); else return (NULL); } else if (lastrep.zprl_parity != rep.zprl_parity) { if (ret) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication level: " "both %llu and %llu device parity " "%s vdevs are present\n"), lastrep.zprl_parity, rep.zprl_parity, rep.zprl_type); else return (NULL); } else if (lastrep.zprl_children != rep.zprl_children) { if (ret) free(ret); ret = NULL; if (fatal) vdev_error(gettext( "mismatched replication level: " "both %llu-way and %llu-way %s " "vdevs are present\n"), lastrep.zprl_children, rep.zprl_children, rep.zprl_type); else return (NULL); } } lastrep = rep; } if (ret != NULL) *ret = rep; return (ret); } /* * Check the replication level of the vdev spec against the current pool. Calls * get_replication() to make sure the new spec is self-consistent. If the pool * has a consistent replication level, then we ignore any errors. Otherwise, * report any difference between the two. */ static int check_replication(nvlist_t *config, nvlist_t *newroot) { nvlist_t **child; uint_t children; replication_level_t *current = NULL, *new; replication_level_t *raidz, *mirror; int ret; /* * If we have a current pool configuration, check to see if it's * self-consistent. If not, simply return success. */ if (config != NULL) { nvlist_t *nvroot; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if ((current = get_replication(nvroot, B_FALSE)) == NULL) return (0); } /* * for spares there may be no children, and therefore no * replication level to check */ if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) || (children == 0)) { free(current); return (0); } /* * If all we have is logs then there's no replication level to check. */ if (num_logs(newroot) == children) { free(current); return (0); } /* * Get the replication level of the new vdev spec, reporting any * inconsistencies found. */ if ((new = get_replication(newroot, B_TRUE)) == NULL) { free(current); return (-1); } /* * Check to see if the new vdev spec matches the replication level of * the current pool. */ ret = 0; if (current != NULL) { if (is_raidz_mirror(current, new, &raidz, &mirror) || is_raidz_mirror(new, current, &raidz, &mirror)) { if (raidz->zprl_parity != mirror->zprl_children - 1) { vdev_error(gettext( "mismatched replication level: pool and " "new vdev with different redundancy, %s " "and %s vdevs, %llu vs. %llu (%llu-way)\n"), raidz->zprl_type, mirror->zprl_type, raidz->zprl_parity, mirror->zprl_children - 1, mirror->zprl_children); ret = -1; } } else if (strcmp(current->zprl_type, new->zprl_type) != 0) { vdev_error(gettext( "mismatched replication level: pool uses %s " "and new vdev is %s\n"), current->zprl_type, new->zprl_type); ret = -1; } else if (current->zprl_parity != new->zprl_parity) { vdev_error(gettext( "mismatched replication level: pool uses %llu " "device parity and new vdev uses %llu\n"), current->zprl_parity, new->zprl_parity); ret = -1; } else if (current->zprl_children != new->zprl_children) { vdev_error(gettext( "mismatched replication level: pool uses %llu-way " "%s and new vdev uses %llu-way %s\n"), current->zprl_children, current->zprl_type, new->zprl_children, new->zprl_type); ret = -1; } } free(new); if (current != NULL) free(current); return (ret); } static int zero_label(char *path) { const int size = 4096; char buf[size]; int err, fd; if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), path, strerror(errno)); return (-1); } memset(buf, 0, size); err = write(fd, buf, size); (void) fdatasync(fd); (void) close(fd); if (err == -1) { (void) fprintf(stderr, gettext("cannot zero first %d bytes " "of '%s': %s\n"), size, path, strerror(errno)); return (-1); } if (err != size) { (void) fprintf(stderr, gettext("could only zero %d/%d bytes " "of '%s'\n"), err, size, path); return (-1); } return (0); } +static void +lines_to_stderr(char *lines[], int lines_cnt) +{ + int i; + for (i = 0; i < lines_cnt; i++) { + fprintf(stderr, "%s\n", lines[i]); + } +} + /* * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this * device in order to provide a devid. Now that we have labelled the disk and * know that slice 0 is valid, we can construct the devid now. * * If the disk was already labeled with an EFI label, we will have gotten the * devid already (because we were able to open the whole disk). Otherwise, we * need to get the devid after we label the disk. */ static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv) +make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing) { nvlist_t **child; uint_t c, children; char *type, *path; char devpath[MAXPATHLEN]; char udevpath[MAXPATHLEN]; uint64_t wholedisk; struct stat64 statbuf; int is_exclusive = 0; int fd; int ret; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { if (strcmp(type, VDEV_TYPE_DISK) != 0) return (0); /* * We have a disk device. If this is a whole disk write * out the efi partition table, otherwise write zero's to * the first 4k of the partition. This is to ensure that * libblkid will not misidentify the partition due to a * magic value left by the previous filesystem. */ verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); if (!wholedisk) { /* * Update device id string for mpath nodes (Linux only) */ if (is_mpath_whole_disk(path)) update_vdev_config_dev_strs(nv); if (!is_spare(NULL, path)) (void) zero_label(path); return (0); } if (realpath(path, devpath) == NULL) { ret = errno; (void) fprintf(stderr, gettext("cannot resolve path '%s'\n"), path); return (ret); } /* * Remove any previously existing symlink from a udev path to * the device before labeling the disk. This ensures that * only newly created links are used. Otherwise there is a * window between when udev deletes and recreates the link * during which access attempts will fail with ENOENT. */ strlcpy(udevpath, path, MAXPATHLEN); (void) zfs_append_partition(udevpath, MAXPATHLEN); fd = open(devpath, O_RDWR|O_EXCL); if (fd == -1) { if (errno == EBUSY) is_exclusive = 1; #ifdef __FreeBSD__ if (errno == EPERM) is_exclusive = 1; #endif } else { (void) close(fd); } /* * If the partition exists, contains a valid spare label, * and is opened exclusively there is no need to partition * it. Hot spares have already been partitioned and are * held open exclusively by the kernel as a safety measure. * * If the provided path is for a /dev/disk/ device its * symbolic link will be removed, partition table created, * and then block until udev creates the new link. */ if (!is_exclusive && !is_spare(NULL, udevpath)) { char *devnode = strrchr(devpath, '/') + 1; + char **lines = NULL; + int lines_cnt = 0; ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT)); if (ret == 0) { ret = lstat64(udevpath, &statbuf); if (ret == 0 && S_ISLNK(statbuf.st_mode)) (void) unlink(udevpath); } /* * When labeling a pool the raw device node name * is provided as it appears under /dev/. + * + * Note that 'zhp' will be NULL when we're creating a + * pool. */ - if (zpool_label_disk(g_zfs, zhp, devnode) == -1) + if (zpool_prepare_and_label_disk(g_zfs, zhp, devnode, + nv, zhp == NULL ? "create" : + replacing ? "replace" : "add", &lines, + &lines_cnt) != 0) { + (void) fprintf(stderr, + gettext( + "Error preparing/labeling disk.\n")); + if (lines_cnt > 0) { + (void) fprintf(stderr, + gettext("zfs_prepare_disk output:\n")); + lines_to_stderr(lines, lines_cnt); + } + + libzfs_free_str_array(lines, lines_cnt); return (-1); + } + libzfs_free_str_array(lines, lines_cnt); /* * Wait for udev to signal the device is available * by the provided path. */ ret = zpool_label_disk_wait(udevpath, DISK_LABEL_WAIT); if (ret) { (void) fprintf(stderr, gettext("missing link: %s was " "partitioned but %s is missing\n"), devnode, udevpath); return (ret); } ret = zero_label(udevpath); if (ret) return (ret); } /* * Update the path to refer to the partition. The presence of * the 'whole_disk' field indicates to the CLI that we should * chop off the partition number when displaying the device in * future output. */ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, udevpath) == 0); /* * Update device id strings for whole disks (Linux only) */ update_vdev_config_dev_strs(nv); return (0); } for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); return (0); } /* * Go through and find any devices that are in use. We rely on libdiskmgt for * the majority of this task. */ static boolean_t is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, boolean_t replacing, boolean_t isspare) { nvlist_t **child; uint_t c, children; char *type, *path; int ret = 0; char buf[MAXPATHLEN]; uint64_t wholedisk = B_FALSE; boolean_t anyinuse = B_FALSE; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); if (strcmp(type, VDEV_TYPE_DISK) == 0) verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); /* * As a generic check, we look to see if this is a replace of a * hot spare within the same pool. If so, we allow it * regardless of what libblkid or zpool_in_use() says. */ if (replacing) { (void) strlcpy(buf, path, sizeof (buf)); if (wholedisk) { ret = zfs_append_partition(buf, sizeof (buf)); if (ret == -1) return (-1); } if (is_spare(config, buf)) return (B_FALSE); } if (strcmp(type, VDEV_TYPE_DISK) == 0) ret = check_device(path, force, isspare, wholedisk); else if (strcmp(type, VDEV_TYPE_FILE) == 0) ret = check_file(path, force, isspare); return (ret != 0); } for (c = 0; c < children; c++) if (is_device_in_use(config, child[c], force, replacing, B_FALSE)) anyinuse = B_TRUE; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) for (c = 0; c < children; c++) if (is_device_in_use(config, child[c], force, replacing, B_TRUE)) anyinuse = B_TRUE; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) for (c = 0; c < children; c++) if (is_device_in_use(config, child[c], force, replacing, B_FALSE)) anyinuse = B_TRUE; return (anyinuse); } /* * Returns the parity level extracted from a raidz or draid type. * If the parity cannot be determined zero is returned. */ static int get_parity(const char *type) { long parity = 0; const char *p; if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0) { p = type + strlen(VDEV_TYPE_RAIDZ); if (*p == '\0') { /* when unspecified default to single parity */ return (1); } else if (*p == '0') { /* no zero prefixes allowed */ return (0); } else { /* 0-3, no suffixes allowed */ char *end; errno = 0; parity = strtol(p, &end, 10); if (errno != 0 || *end != '\0' || parity < 1 || parity > VDEV_RAIDZ_MAXPARITY) { return (0); } } } else if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) { p = type + strlen(VDEV_TYPE_DRAID); if (*p == '\0' || *p == ':') { /* when unspecified default to single parity */ return (1); } else if (*p == '0') { /* no zero prefixes allowed */ return (0); } else { /* 0-3, allowed suffixes: '\0' or ':' */ char *end; errno = 0; parity = strtol(p, &end, 10); if (errno != 0 || parity < 1 || parity > VDEV_DRAID_MAXPARITY || (*end != '\0' && *end != ':')) { return (0); } } } return ((int)parity); } /* * Assign the minimum and maximum number of devices allowed for * the specified type. On error NULL is returned, otherwise the * type prefix is returned (raidz, mirror, etc). */ static const char * is_grouping(const char *type, int *mindev, int *maxdev) { int nparity; if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) { nparity = get_parity(type); if (nparity == 0) return (NULL); if (mindev != NULL) *mindev = nparity + 1; if (maxdev != NULL) *maxdev = 255; if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0) { return (VDEV_TYPE_RAIDZ); } else { return (VDEV_TYPE_DRAID); } } if (maxdev != NULL) *maxdev = INT_MAX; if (strcmp(type, "mirror") == 0) { if (mindev != NULL) *mindev = 2; return (VDEV_TYPE_MIRROR); } if (strcmp(type, "spare") == 0) { if (mindev != NULL) *mindev = 1; return (VDEV_TYPE_SPARE); } if (strcmp(type, "log") == 0) { if (mindev != NULL) *mindev = 1; return (VDEV_TYPE_LOG); } if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 || strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { if (mindev != NULL) *mindev = 1; return (type); } if (strcmp(type, "cache") == 0) { if (mindev != NULL) *mindev = 1; return (VDEV_TYPE_L2CACHE); } return (NULL); } /* * Extract the configuration parameters encoded in the dRAID type and * use them to generate a dRAID configuration. The expected format is: * * draid[][:][:][:] * * The intent is to be able to generate a good configuration when no * additional information is provided. The only mandatory component * of the 'type' is the 'draid' prefix. If a value is not provided * then reasonable defaults are used. The optional components may * appear in any order but the d/s/c suffix is required. * * Valid inputs: * - data: number of data devices per group (1-255) * - parity: number of parity blocks per group (1-3) * - spares: number of distributed spare (0-100) * - children: total number of devices (1-255) * * Examples: * - zpool create tank draid * - zpool create tank draid2:8d:51c:2s */ static int draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children) { uint64_t nparity = 1; uint64_t nspares = 0; uint64_t ndata = UINT64_MAX; uint64_t ngroups = 1; long value; if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) != 0) return (EINVAL); nparity = (uint64_t)get_parity(type); if (nparity == 0) return (EINVAL); char *p = (char *)type; while ((p = strchr(p, ':')) != NULL) { char *end; p = p + 1; errno = 0; if (!isdigit(p[0])) { (void) fprintf(stderr, gettext("invalid dRAID " "syntax; expected [:] not '%s'\n"), type); return (EINVAL); } /* Expected non-zero value with c/d/s suffix */ value = strtol(p, &end, 10); char suffix = tolower(*end); if (errno != 0 || (suffix != 'c' && suffix != 'd' && suffix != 's')) { (void) fprintf(stderr, gettext("invalid dRAID " "syntax; expected [:] not '%s'\n"), type); return (EINVAL); } if (suffix == 'c') { if ((uint64_t)value != children) { fprintf(stderr, gettext("invalid number of dRAID children; " "%llu required but %llu provided\n"), (u_longlong_t)value, (u_longlong_t)children); return (EINVAL); } } else if (suffix == 'd') { ndata = (uint64_t)value; } else if (suffix == 's') { nspares = (uint64_t)value; } else { verify(0); /* Unreachable */ } } /* * When a specific number of data disks is not provided limit a * redundancy group to 8 data disks. This value was selected to * provide a reasonable tradeoff between capacity and performance. */ if (ndata == UINT64_MAX) { if (children > nspares + nparity) { ndata = MIN(children - nspares - nparity, 8); } else { fprintf(stderr, gettext("request number of " "distributed spares %llu and parity level %llu\n" "leaves no disks available for data\n"), (u_longlong_t)nspares, (u_longlong_t)nparity); return (EINVAL); } } /* Verify the maximum allowed group size is never exceeded. */ if (ndata == 0 || (ndata + nparity > children - nspares)) { fprintf(stderr, gettext("requested number of dRAID data " "disks per group %llu is too high,\nat most %llu disks " "are available for data\n"), (u_longlong_t)ndata, (u_longlong_t)(children - nspares - nparity)); return (EINVAL); } if (nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) { fprintf(stderr, gettext("invalid dRAID parity level %llu; must be " "between 1 and %d\n"), (u_longlong_t)nparity, VDEV_DRAID_MAXPARITY); return (EINVAL); } /* * Verify the requested number of spares can be satisfied. * An arbitrary limit of 100 distributed spares is applied. */ if (nspares > 100 || nspares > (children - (ndata + nparity))) { fprintf(stderr, gettext("invalid number of dRAID spares %llu; additional " "disks would be required\n"), (u_longlong_t)nspares); return (EINVAL); } /* Verify the requested number children is sufficient. */ if (children < (ndata + nparity + nspares)) { fprintf(stderr, gettext("%llu disks were provided, but at " "least %llu disks are required for this config\n"), (u_longlong_t)children, (u_longlong_t)(ndata + nparity + nspares)); } if (children > VDEV_DRAID_MAX_CHILDREN) { fprintf(stderr, gettext("%llu disks were provided, but " "dRAID only supports up to %u disks"), (u_longlong_t)children, VDEV_DRAID_MAX_CHILDREN); } /* * Calculate the minimum number of groups required to fill a slice. * This is the LCM of the stripe width (ndata + nparity) and the * number of data drives (children - nspares). */ while (ngroups * (ndata + nparity) % (children - nspares) != 0) ngroups++; /* Store the basic dRAID configuration. */ fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity); fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, ndata); fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, nspares); fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups); return (0); } /* * Construct a syntactically valid vdev specification, * and ensure that all devices and files exist and can be opened. * Note: we don't bother freeing anything in the error paths * because the program is just going to exit anyway. */ static nvlist_t * construct_spec(nvlist_t *props, int argc, char **argv) { nvlist_t *nvroot, *nv, **top, **spares, **l2cache; int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; const char *type, *fulltype; boolean_t is_log, is_special, is_dedup, is_spare; boolean_t seen_logs; top = NULL; toplevels = 0; spares = NULL; l2cache = NULL; nspares = 0; nlogs = 0; nl2cache = 0; is_log = is_special = is_dedup = is_spare = B_FALSE; seen_logs = B_FALSE; nvroot = NULL; while (argc > 0) { fulltype = argv[0]; nv = NULL; /* * If it's a mirror, raidz, or draid the subsequent arguments * are its leaves -- until we encounter the next mirror, * raidz or draid. */ if ((type = is_grouping(fulltype, &mindev, &maxdev)) != NULL) { nvlist_t **child = NULL; int c, children = 0; if (strcmp(type, VDEV_TYPE_SPARE) == 0) { if (spares != NULL) { (void) fprintf(stderr, gettext("invalid vdev " "specification: 'spare' can be " "specified only once\n")); goto spec_out; } is_spare = B_TRUE; is_log = is_special = is_dedup = B_FALSE; } if (strcmp(type, VDEV_TYPE_LOG) == 0) { if (seen_logs) { (void) fprintf(stderr, gettext("invalid vdev " "specification: 'log' can be " "specified only once\n")); goto spec_out; } seen_logs = B_TRUE; is_log = B_TRUE; is_special = is_dedup = is_spare = B_FALSE; argc--; argv++; /* * A log is not a real grouping device. * We just set is_log and continue. */ continue; } if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) { is_special = B_TRUE; is_log = is_dedup = is_spare = B_FALSE; argc--; argv++; continue; } if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { is_dedup = B_TRUE; is_log = is_special = is_spare = B_FALSE; argc--; argv++; continue; } if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { if (l2cache != NULL) { (void) fprintf(stderr, gettext("invalid vdev " "specification: 'cache' can be " "specified only once\n")); goto spec_out; } is_log = is_special = B_FALSE; is_dedup = is_spare = B_FALSE; } if (is_log || is_special || is_dedup) { if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { (void) fprintf(stderr, gettext("invalid vdev " "specification: unsupported '%s' " "device: %s\n"), is_log ? "log" : "special", type); goto spec_out; } nlogs++; } for (c = 1; c < argc; c++) { if (is_grouping(argv[c], NULL, NULL) != NULL) break; children++; child = realloc(child, children * sizeof (nvlist_t *)); if (child == NULL) zpool_no_memory(); if ((nv = make_leaf_vdev(props, argv[c], !(is_log || is_special || is_dedup || is_spare))) == NULL) { for (c = 0; c < children - 1; c++) nvlist_free(child[c]); free(child); goto spec_out; } child[children - 1] = nv; } if (children < mindev) { (void) fprintf(stderr, gettext("invalid vdev " "specification: %s requires at least %d " "devices\n"), argv[0], mindev); for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); goto spec_out; } if (children > maxdev) { (void) fprintf(stderr, gettext("invalid vdev " "specification: %s supports no more than " "%d devices\n"), argv[0], maxdev); for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); goto spec_out; } argc -= c; argv += c; if (strcmp(type, VDEV_TYPE_SPARE) == 0) { spares = child; nspares = children; continue; } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { l2cache = child; nl2cache = children; continue; } else { /* create a top-level vdev with children */ verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0); verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, type) == 0); verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, is_log) == 0); if (is_log) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_LOG) == 0); } if (is_special) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_SPECIAL) == 0); } if (is_dedup) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, mindev - 1) == 0); } if (strcmp(type, VDEV_TYPE_DRAID) == 0) { if (draid_config_by_type(nv, fulltype, children) != 0) { for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); goto spec_out; } } verify(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, child, children) == 0); for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); } } else { /* * We have a device. Pass off to make_leaf_vdev() to * construct the appropriate nvlist describing the vdev. */ if ((nv = make_leaf_vdev(props, argv[0], !(is_log || is_special || is_dedup || is_spare))) == NULL) goto spec_out; verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, is_log) == 0); if (is_log) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_LOG) == 0); nlogs++; } if (is_special) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_SPECIAL) == 0); } if (is_dedup) { verify(nvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } argc--; argv++; } toplevels++; top = realloc(top, toplevels * sizeof (nvlist_t *)); if (top == NULL) zpool_no_memory(); top[toplevels - 1] = nv; } if (toplevels == 0 && nspares == 0 && nl2cache == 0) { (void) fprintf(stderr, gettext("invalid vdev " "specification: at least one toplevel vdev must be " "specified\n")); goto spec_out; } if (seen_logs && nlogs == 0) { (void) fprintf(stderr, gettext("invalid vdev specification: " "log requires at least 1 device\n")); goto spec_out; } /* * Finally, create nvroot and add all top-level vdevs to it. */ verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, top, toplevels) == 0); if (nspares != 0) verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); if (nl2cache != 0) verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spec_out: for (t = 0; t < toplevels; t++) nvlist_free(top[t]); for (t = 0; t < nspares; t++) nvlist_free(spares[t]); for (t = 0; t < nl2cache; t++) nvlist_free(l2cache[t]); free(spares); free(l2cache); free(top); return (nvroot); } nvlist_t * split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv) { nvlist_t *newroot = NULL, **child; uint_t c, children; if (argc > 0) { if ((newroot = construct_spec(props, argc, argv)) == NULL) { (void) fprintf(stderr, gettext("Unable to build a " "pool from the specified devices\n")); return (NULL); } - if (!flags.dryrun && make_disks(zhp, newroot) != 0) { + if (!flags.dryrun && make_disks(zhp, newroot, B_FALSE) != 0) { nvlist_free(newroot); return (NULL); } /* avoid any tricks in the spec */ verify(nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); for (c = 0; c < children; c++) { char *path; const char *type; int min, max; verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_PATH, &path) == 0); if ((type = is_grouping(path, &min, &max)) != NULL) { (void) fprintf(stderr, gettext("Cannot use " "'%s' as a device for splitting\n"), type); nvlist_free(newroot); return (NULL); } } } if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) { nvlist_free(newroot); return (NULL); } return (newroot); } static int num_normal_vdevs(nvlist_t *nvroot) { nvlist_t **top; uint_t t, toplevels, normal = 0; verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &top, &toplevels) == 0); for (t = 0; t < toplevels; t++) { uint64_t log = B_FALSE; (void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log); if (log) continue; if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS)) continue; normal++; } return (normal); } /* * Get and validate the contents of the given vdev specification. This ensures * that the nvlist returned is well-formed, that all the devices exist, and that * they are not currently in use by any other known consumer. The 'poolconfig' * parameter is the current configuration of the pool when adding devices * existing pool, and is used to perform additional checks, such as changing the * replication level of the pool. It can be 'NULL' to indicate that this is a * new pool. The 'force' flag controls whether devices should be forcefully * added, even if they appear in use. */ nvlist_t * make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, boolean_t replacing, boolean_t dryrun, int argc, char **argv) { nvlist_t *newroot; nvlist_t *poolconfig = NULL; is_force = force; /* * Construct the vdev specification. If this is successful, we know * that we have a valid specification, and that all devices can be * opened. */ if ((newroot = construct_spec(props, argc, argv)) == NULL) return (NULL); if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) { nvlist_free(newroot); return (NULL); } /* * Validate each device to make sure that it's not shared with another * subsystem. We do this even if 'force' is set, because there are some * uses (such as a dedicated dump device) that even '-f' cannot * override. */ if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { nvlist_free(newroot); return (NULL); } /* * Check the replication level of the given vdevs and report any errors * found. We include the existing pool spec, if any, as we need to * catch changes against the existing replication level. */ if (check_rep && check_replication(poolconfig, newroot) != 0) { nvlist_free(newroot); return (NULL); } /* * On pool create the new vdev spec must have one normal vdev. */ if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) { vdev_error(gettext("at least one general top-level vdev must " "be specified\n")); nvlist_free(newroot); return (NULL); } /* * Run through the vdev specification and label any whole disks found. */ - if (!dryrun && make_disks(zhp, newroot) != 0) { + if (!dryrun && make_disks(zhp, newroot, replacing) != 0) { nvlist_free(newroot); return (NULL); } return (newroot); } diff --git a/config/Rules.am b/config/Rules.am index 3b24e3630102..8c484d18c2a0 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -1,64 +1,65 @@ # # Default build rules for all user space components, every Makefile.am # should include these rules and override or extend them as needed. # PHONY = DEFAULT_INCLUDES = \ -include $(top_builddir)/zfs_config.h \ -I$(top_builddir)/include \ -I$(top_srcdir)/include \ -I$(top_srcdir)/module/icp/include \ -I$(top_srcdir)/lib/libspl/include if BUILD_LINUX DEFAULT_INCLUDES += \ -I$(top_srcdir)/lib/libspl/include/os/linux endif if BUILD_FREEBSD DEFAULT_INCLUDES += \ -I$(top_srcdir)/lib/libspl/include/os/freebsd endif AM_LIBTOOLFLAGS = --silent AM_CFLAGS = -std=gnu99 -Wall -Wstrict-prototypes -Wmissing-prototypes AM_CFLAGS += -fno-strict-aliasing AM_CFLAGS += $(NO_OMIT_FRAME_POINTER) AM_CFLAGS += $(IMPLICIT_FALLTHROUGH) AM_CFLAGS += $(DEBUG_CFLAGS) AM_CFLAGS += $(ASAN_CFLAGS) AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) $(NO_FORMAT_ZERO_LENGTH) if BUILD_FREEBSD AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h AM_CFLAGS += -I/usr/include -I/usr/local/include endif AM_CPPFLAGS = -D_GNU_SOURCE AM_CPPFLAGS += -D_REENTRANT AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\" +AM_CPPFLAGS += -DZFSEXECDIR=\"$(zfsexecdir)\" AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\" AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\" AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\" AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\" AM_CPPFLAGS += $(DEBUG_CPPFLAGS) AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS) if BUILD_LINUX AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\" endif if BUILD_FREEBSD AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-freebsd-user\" endif AM_LDFLAGS = $(DEBUG_LDFLAGS) AM_LDFLAGS += $(ASAN_LDFLAGS) if BUILD_FREEBSD AM_LDFLAGS += -fstack-protector-strong -shared AM_LDFLAGS += -Wl,-x -Wl,--fatal-warnings -Wl,--warn-shared-textrel AM_LDFLAGS += -lm endif diff --git a/include/libzfs.h b/include/libzfs.h index 214a188f9474..c7ebc52fe9fb 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -1,959 +1,968 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright Joyent, Inc. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2016, Intel Corporation. * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2021, Colm Buckley */ #ifndef _LIBZFS_H #define _LIBZFS_H #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Miscellaneous ZFS constants */ #define ZFS_MAXPROPLEN MAXPATHLEN #define ZPOOL_MAXPROPLEN MAXPATHLEN /* * libzfs errors */ typedef enum zfs_error { EZFS_SUCCESS = 0, /* no error -- success */ EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ EZFS_PROPTYPE, /* property does not apply to dataset type */ EZFS_PROPNONINHERIT, /* property is not inheritable */ EZFS_PROPSPACE, /* bad quota or reservation */ EZFS_BADTYPE, /* dataset is not of appropriate type */ EZFS_BUSY, /* pool or dataset is busy */ EZFS_EXISTS, /* pool or dataset already exists */ EZFS_NOENT, /* no such pool or dataset */ EZFS_BADSTREAM, /* bad backup stream */ EZFS_DSREADONLY, /* dataset is readonly */ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ EZFS_INVALIDNAME, /* invalid dataset name */ EZFS_BADRESTORE, /* unable to restore to destination */ EZFS_BADBACKUP, /* backup failed */ EZFS_BADTARGET, /* bad attach/detach/replace target */ EZFS_NODEVICE, /* no such device in pool */ EZFS_BADDEV, /* invalid device to add */ EZFS_NOREPLICAS, /* no valid replicas */ EZFS_RESILVERING, /* resilvering (healing reconstruction) */ EZFS_BADVERSION, /* unsupported version */ EZFS_POOLUNAVAIL, /* pool is currently unavailable */ EZFS_DEVOVERFLOW, /* too many devices in one vdev */ EZFS_BADPATH, /* must be an absolute path */ EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ EZFS_ZONED, /* used improperly in local zone */ EZFS_MOUNTFAILED, /* failed to mount dataset */ EZFS_UMOUNTFAILED, /* failed to unmount dataset */ EZFS_UNSHARENFSFAILED, /* failed to unshare over nfs */ EZFS_SHARENFSFAILED, /* failed to share over nfs */ EZFS_PERM, /* permission denied */ EZFS_NOSPC, /* out of space */ EZFS_FAULT, /* bad address */ EZFS_IO, /* I/O error */ EZFS_INTR, /* signal received */ EZFS_ISSPARE, /* device is a hot spare */ EZFS_INVALCONFIG, /* invalid vdev configuration */ EZFS_RECURSIVE, /* recursive dependency */ EZFS_NOHISTORY, /* no history object */ EZFS_POOLPROPS, /* couldn't retrieve pool props */ EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ EZFS_NAMETOOLONG, /* dataset name is too long */ EZFS_OPENFAILED, /* open of device failed */ EZFS_NOCAP, /* couldn't get capacity */ EZFS_LABELFAILED, /* write of label failed */ EZFS_BADWHO, /* invalid permission who */ EZFS_BADPERM, /* invalid permission */ EZFS_BADPERMSET, /* invalid permission set name */ EZFS_NODELEGATION, /* delegated administration is disabled */ EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ EZFS_SHARESMBFAILED, /* failed to share over smb */ EZFS_BADCACHE, /* bad cache file */ EZFS_ISL2CACHE, /* device is for the level 2 ARC */ EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_NOTSUP, /* ops not supported on this dataset */ EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ EZFS_REFTAG_RELE, /* snapshot release: tag not found */ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ EZFS_PIPEFAILED, /* pipe create failed */ EZFS_THREADCREATEFAILED, /* thread create failed */ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ EZFS_SCRUBBING, /* currently scrubbing */ EZFS_NO_SCRUB, /* no active scrub */ EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */ EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */ EZFS_NO_CHECKPOINT, /* pool has no checkpoint */ EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */ EZFS_VDEV_TOO_BIG, /* a device is too big to be used */ EZFS_IOC_NOTSUPPORTED, /* operation not supported by zfs module */ EZFS_TOOMANY, /* argument list too long */ EZFS_INITIALIZING, /* currently initializing */ EZFS_NO_INITIALIZE, /* no active initialize */ EZFS_WRONG_PARENT, /* invalid parent dataset (e.g ZVOL) */ EZFS_TRIMMING, /* currently trimming */ EZFS_NO_TRIM, /* no active trim */ EZFS_TRIM_NOTSUP, /* device does not support trim */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_REBUILDING, /* resilvering (sequential reconstrution) */ EZFS_CKSUM, /* insufficient replicas */ EZFS_UNKNOWN } zfs_error_t; /* * The following data structures are all part * of the zfs_allow_t data structure which is * used for printing 'allow' permissions. * It is a linked list of zfs_allow_t's which * then contain avl tree's for user/group/sets/... * and each one of the entries in those trees have * avl tree's for the permissions they belong to and * whether they are local,descendent or local+descendent * permissions. The AVL trees are used primarily for * sorting purposes, but also so that we can quickly find * a given user and or permission. */ typedef struct zfs_perm_node { avl_node_t z_node; char z_pname[MAXPATHLEN]; } zfs_perm_node_t; typedef struct zfs_allow_node { avl_node_t z_node; char z_key[MAXPATHLEN]; /* name, such as joe */ avl_tree_t z_localdescend; /* local+descendent perms */ avl_tree_t z_local; /* local permissions */ avl_tree_t z_descend; /* descendent permissions */ } zfs_allow_node_t; typedef struct zfs_allow { struct zfs_allow *z_next; char z_setpoint[MAXPATHLEN]; avl_tree_t z_sets; avl_tree_t z_crperms; avl_tree_t z_user; avl_tree_t z_group; avl_tree_t z_everyone; } zfs_allow_t; /* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; typedef struct zpool_handle zpool_handle_t; typedef struct libzfs_handle libzfs_handle_t; extern int zpool_wait(zpool_handle_t *, zpool_wait_activity_t); extern int zpool_wait_status(zpool_handle_t *, zpool_wait_activity_t, boolean_t *, boolean_t *); /* * Library initialization */ extern libzfs_handle_t *libzfs_init(void); extern void libzfs_fini(libzfs_handle_t *); extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); extern void zfs_save_arguments(int argc, char **, char *, int); extern int zpool_log_history(libzfs_handle_t *, const char *); extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_init(int); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); extern int zfs_standard_error(libzfs_handle_t *, int, const char *); extern void libzfs_mnttab_init(libzfs_handle_t *); extern void libzfs_mnttab_fini(libzfs_handle_t *); extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, struct mnttab *); extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, const char *, const char *); extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); /* * Basic handle functions */ extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); extern void zpool_close(zpool_handle_t *); extern const char *zpool_get_name(zpool_handle_t *); extern int zpool_get_state(zpool_handle_t *); extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); extern const char *zpool_pool_state_to_name(pool_state_t); extern void zpool_free_handles(libzfs_handle_t *); /* * Iterate over all active pools in the system. */ typedef int (*zpool_iter_f)(zpool_handle_t *, void *); extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); extern boolean_t zpool_skip_pool(const char *); /* * Functions to create and destroy pools */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); extern int zpool_destroy(zpool_handle_t *, const char *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { /* do not split, but return the config that would be split off */ unsigned int dryrun : 1; /* after splitting, import the pool */ unsigned int import : 1; int name_flags; } splitflags_t; typedef struct trimflags { /* requested vdevs are for the entire pool */ boolean_t fullpool; /* request a secure trim, requires support from device */ boolean_t secure; /* after starting trim, block until trim completes */ boolean_t wait; /* trim at the requested rate in bytes/second */ uint64_t rate; } trimflags_t; /* * Functions to manipulate pool and vdev state */ extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); extern int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *, trimflags_t *); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); extern int zpool_reopen_one(zpool_handle_t *, void *); extern int zpool_sync_one(zpool_handle_t *, void *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int, boolean_t); extern int zpool_vdev_detach(zpool_handle_t *, const char *); extern int zpool_vdev_remove(zpool_handle_t *, const char *); extern int zpool_vdev_remove_cancel(zpool_handle_t *); extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); +extern int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv, + const char *prepare_str, char **lines[], int *lines_cnt); +extern int zpool_prepare_and_label_disk(libzfs_handle_t *hdl, + zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str, + char **lines[], int *lines_cnt); +extern char ** zpool_vdev_script_alloc_env(const char *pool_name, + const char *vdev_path, const char *vdev_upath, + const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val); +extern void zpool_vdev_script_free_env(char **env); extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); const char *zpool_get_state_str(zpool_handle_t *); /* * Functions to manage pool properties */ extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, size_t proplen, zprop_source_t *, boolean_t literal); extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, zprop_source_t *); extern int zpool_props_refresh(zpool_handle_t *); extern const char *zpool_prop_to_name(zpool_prop_t); extern const char *zpool_prop_values(zpool_prop_t); /* * Pool health statistics. */ typedef enum { /* * The following correspond to faults as defined in the (fault.fs.zfs.*) * event namespace. Each is associated with a corresponding message ID. * This must be kept in sync with the zfs_msgid_table in * lib/libzfs/libzfs_status.c. */ ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */ ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ ZPOOL_STATUS_ERRATA, /* informational errata available */ /* * If the pool has unsupported features but can still be opened in * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the * pool has unsupported features but cannot be opened at all, its * status is ZPOOL_STATUS_UNSUP_FEAT_READ. */ ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */ ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */ /* * These faults have no corresponding message ID. At the time we are * checking the status, the original reason for the FMA fault (I/O or * checksum errors) has been lost. */ ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ /* * The following are not faults per se, but still an error possibly * requiring administrative attention. There is no corresponding * message ID. */ ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */ ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */ ZPOOL_STATUS_RESILVERING, /* device being resilvered */ ZPOOL_STATUS_OFFLINE_DEV, /* device offline */ ZPOOL_STATUS_REMOVED_DEV, /* removed device */ ZPOOL_STATUS_REBUILDING, /* device being rebuilt */ ZPOOL_STATUS_REBUILD_SCRUB, /* recommend scrubbing the pool */ ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ ZPOOL_STATUS_COMPATIBILITY_ERR, /* bad 'compatibility' property */ ZPOOL_STATUS_INCOMPATIBLE_FEAT, /* feature set outside compatibility */ /* * Finally, the following indicates a healthy pool. */ ZPOOL_STATUS_OK } zpool_status_t; extern zpool_status_t zpool_get_status(zpool_handle_t *, char **, zpool_errata_t *); extern zpool_status_t zpool_import_status(nvlist_t *, char **, zpool_errata_t *); /* * Statistics and configuration functions. */ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern nvlist_t *zpool_get_features(zpool_handle_t *); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); /* * Import and export functions */ extern int zpool_export(zpool_handle_t *, boolean_t, const char *); extern int zpool_export_force(zpool_handle_t *, const char *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, nvlist_t *, int); extern void zpool_print_unsup_feat(nvlist_t *config); /* * Miscellaneous pool functions */ struct zfs_cmd; extern const char *zfs_history_event_names[]; typedef enum { VDEV_NAME_PATH = 1 << 0, VDEV_NAME_GUID = 1 << 1, VDEV_NAME_FOLLOW_LINKS = 1 << 2, VDEV_NAME_TYPE_ID = 1 << 3, } vdev_name_t; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, int name_flags); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *, boolean_t *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned, int); extern int zpool_events_clear(libzfs_handle_t *, int *); extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int); extern void zpool_obj_to_path_ds(zpool_handle_t *, uint64_t, uint64_t, char *, size_t); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); extern int zpool_checkpoint(zpool_handle_t *); extern int zpool_discard_checkpoint(zpool_handle_t *); extern boolean_t zpool_is_draid_spare(const char *); /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. */ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); extern const char *zfs_get_pool_name(const zfs_handle_t *); /* * Property management functions. Some functions are shared with the kernel, * and are found in sys/fs/zfs.h. */ /* * zfs dataset property management */ extern const char *zfs_prop_default_string(zfs_prop_t); extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zprop_source_t *, char *, size_t); extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, char *buf, size_t len); extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_all_props(zfs_handle_t *); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t, boolean_t *, boolean_t *); /* * zfs encryption management */ extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *); extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, boolean_t stdin_available, uint8_t **, uint_t *); extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *, nvlist_t *); extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); extern int zfs_crypto_unload_key(zfs_handle_t *); extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); typedef struct zprop_list { int pl_prop; char *pl_user_prop; struct zprop_list *pl_next; boolean_t pl_all; size_t pl_width; size_t pl_recvd_width; boolean_t pl_fixed; } zprop_list_t; extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" #define ZFS_MOUNTPOINT_LEGACY "legacy" #define ZFS_FEATURE_DISABLED "disabled" #define ZFS_FEATURE_ENABLED "enabled" #define ZFS_FEATURE_ACTIVE "active" #define ZFS_UNSUPPORTED_INACTIVE "inactive" #define ZFS_UNSUPPORTED_READONLY "readonly" /* * zpool property management */ extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **, boolean_t); extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, size_t); extern const char *zpool_prop_default_string(zpool_prop_t); extern uint64_t zpool_prop_default_numeric(zpool_prop_t); extern const char *zpool_prop_column_name(zpool_prop_t); extern boolean_t zpool_prop_align_right(zpool_prop_t); /* * Functions shared by zfs and zpool property management. */ extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type); extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); extern void zprop_free_list(zprop_list_t *); #define ZFS_GET_NCOLS 5 typedef enum { GET_COL_NONE, GET_COL_NAME, GET_COL_PROPERTY, GET_COL_VALUE, GET_COL_RECVD, GET_COL_SOURCE } zfs_get_column_t; /* * Functions for printing zfs or zpool properties */ typedef struct zprop_get_cbdata { int cb_sources; zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; int cb_colwidths[ZFS_GET_NCOLS + 1]; boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; zprop_list_t *cb_proplist; zfs_type_t cb_type; } zprop_get_cbdata_t; void zprop_print_one_property(const char *, zprop_get_cbdata_t *, const char *, const char *, zprop_source_t, const char *, const char *); /* * Iterator functions. */ typedef int (*zfs_iter_f)(zfs_handle_t *, void *); extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_mounted(zfs_handle_t *, zfs_iter_f, void *); typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; } get_all_cb_t; void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, zfs_iter_f, void *, boolean_t); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); /* * Functions to create and destroy datasets. */ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, nvlist_t *); extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); typedef struct renameflags { /* recursive rename */ unsigned int recursive : 1; /* don't unmount file systems */ unsigned int nounmount : 1; /* force unmount file systems */ unsigned int forceunmount : 1; } renameflags_t; extern int zfs_rename(zfs_handle_t *, const char *, renameflags_t); typedef struct sendflags { /* Amount of extra information to print. */ int verbosity; /* recursive send (ie, -R) */ boolean_t replicate; /* for recursive send, skip sending missing snapshots */ boolean_t skipmissing; /* for incrementals, do all intermediate snapshots */ boolean_t doall; /* if dataset is a clone, do incremental from its origin */ boolean_t fromorigin; /* field no longer used, maintained for backwards compatibility */ boolean_t pad; /* send properties (ie, -p) */ boolean_t props; /* do not send (no-op, ie. -n) */ boolean_t dryrun; /* parsable verbose output (ie. -P) */ boolean_t parsable; /* show progress (ie. -v) */ boolean_t progress; /* show progress as process title (ie. -V) */ boolean_t progressastitle; /* large blocks (>128K) are permitted */ boolean_t largeblock; /* WRITE_EMBEDDED records of type DATA are permitted */ boolean_t embed_data; /* compressed WRITE records are permitted */ boolean_t compress; /* raw encrypted records are permitted */ boolean_t raw; /* only send received properties (ie. -b) */ boolean_t backup; /* include snapshot holds in send stream */ boolean_t holds; /* stream represents a partially received dataset */ boolean_t saved; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t *, const char *); extern int zfs_send_progress(zfs_handle_t *, int, uint64_t *, uint64_t *); extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, const char *); extern int zfs_send_saved(zfs_handle_t *, sendflags_t *, int, const char *); extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, int); extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, zfs_userspace_cb_t, void *); extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ boolean_t verbose; /* the destination is a prefix, not the exact fs (ie, -d) */ boolean_t isprefix; /* * Only the tail of the sent snapshot path is appended to the * destination to determine the received snapshot name (ie, -e). */ boolean_t istail; /* do not actually do the recv, just check if it would work (ie, -n) */ boolean_t dryrun; /* rollback/destroy filesystems as necessary (eg, -F) */ boolean_t force; /* set "canmount=off" on all modified filesystems */ boolean_t canmountoff; /* * Mark the file systems as "resumable" and do not destroy them if the * receive is interrupted */ boolean_t resumable; /* byteswap flag is used internally; callers need not specify */ boolean_t byteswap; /* do not mount file systems as they are extracted (private) */ boolean_t nomount; /* Was holds flag set in the compound header? */ boolean_t holds; /* skip receive of snapshot holds */ boolean_t skipholds; /* mount the filesystem unless nomount is specified */ boolean_t domount; /* force unmount while recv snapshot (private) */ boolean_t forceunmount; } recvflags_t; extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 1 << 0, ZFS_DIFF_TIMESTAMP = 1 << 1, ZFS_DIFF_CLASSIFY = 1 << 2, ZFS_DIFF_NO_MANGLE = 1 << 3 } diff_flags_t; extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, int); /* * Miscellaneous functions. */ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); extern boolean_t zfs_bookmark_exists(const char *path); /* * Mount support functions. */ extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); extern int zfs_mount(zfs_handle_t *, const char *, int); extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *); extern int zfs_unmount(zfs_handle_t *, const char *, int); extern int zfs_unmountall(zfs_handle_t *, int); #if defined(__linux__) extern int zfs_parse_mount_options(char *mntopts, unsigned long *mntflags, unsigned long *zfsflags, int sloppy, char *badopt, char *mtabopt); extern void zfs_adjust_mount_options(zfs_handle_t *zhp, const char *mntpoint, char *mntopts, char *mtabopt); #endif /* * Share support functions. */ extern boolean_t zfs_is_shared(zfs_handle_t *); extern int zfs_share(zfs_handle_t *); extern int zfs_unshare(zfs_handle_t *); /* * Protocol-specific share support functions. */ extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); extern int zfs_share_nfs(zfs_handle_t *); extern int zfs_share_smb(zfs_handle_t *); extern int zfs_shareall(zfs_handle_t *); extern int zfs_unshare_nfs(zfs_handle_t *, const char *); extern int zfs_unshare_smb(zfs_handle_t *, const char *); extern int zfs_unshareall_nfs(zfs_handle_t *); extern int zfs_unshareall_smb(zfs_handle_t *); extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); extern int zfs_unshareall_bytype(zfs_handle_t *, const char *, const char *); extern int zfs_unshareall(zfs_handle_t *); extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); extern void zfs_commit_nfs_shares(void); extern void zfs_commit_smb_shares(void); extern void zfs_commit_all_shares(void); extern void zfs_commit_shares(const char *); extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); /* * Utility functions to run an external process. */ #define STDOUT_VERBOSE 0x01 #define STDERR_VERBOSE 0x02 #define NO_DEFAULT_PATH 0x04 /* Don't use $PATH to lookup the command */ int libzfs_run_process(const char *, char **, int); int libzfs_run_process_get_stdout(const char *, char *[], char *[], char **[], int *); int libzfs_run_process_get_stdout_nopath(const char *, char *[], char *[], char **[], int *); void libzfs_free_str_array(char **, int); int libzfs_envvar_is_set(char *); /* * Utility functions for zfs version */ extern void zfs_version_userland(char *, int); extern int zfs_version_kernel(char *, int); extern int zfs_version_print(void); /* * Given a device or file, determine if it is part of a pool. */ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, boolean_t *); /* * Label manipulation. */ extern int zpool_clear_label(int); extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *); extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **); /* * Management interfaces for SMB ACL files */ int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); /* * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); /* * Parse a features file for -o compatibility */ typedef enum { ZPOOL_COMPATIBILITY_OK, ZPOOL_COMPATIBILITY_WARNTOKEN, ZPOOL_COMPATIBILITY_BADTOKEN, ZPOOL_COMPATIBILITY_BADFILE, ZPOOL_COMPATIBILITY_NOFILES } zpool_compat_status_t; extern zpool_compat_status_t zpool_load_compat(const char *, boolean_t *, char *, size_t); #ifdef __FreeBSD__ /* * Attach/detach the given filesystem to/from the given jail. */ extern int zfs_jail(zfs_handle_t *zhp, int jailid, int attach); /* * Set loader options for next boot. */ extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *); #endif /* __FreeBSD__ */ #ifdef __cplusplus } #endif #endif /* _LIBZFS_H */ diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 58c2d7635f11..7e8e5ae02220 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -1,7606 +1,7610 @@ + + + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 7c4d310782eb..256ae9d1b778 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1,2123 +1,2316 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2020 Joyent, Inc. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov * Copyright (c) 2017 Datto Inc. * Copyright (c) 2020 The FreeBSD Foundation * * Portions of this software were developed by Allan Jude * under sponsorship from the FreeBSD Foundation. */ /* * Internal utility routines for the ZFS library. */ #include #include #include #include #include #include #include #include #include #if LIBFETCH_DYNAMIC #include #endif #include #include #include #include #include #include #include #include "libzfs_impl.h" #include "zfs_prop.h" #include "zfeature_common.h" #include #include /* * We only care about the scheme in order to match the scheme * with the handler. Each handler should validate the full URI * as necessary. */ #define URI_REGEX "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):" int libzfs_errno(libzfs_handle_t *hdl) { return (hdl->libzfs_error); } const char * libzfs_error_action(libzfs_handle_t *hdl) { return (hdl->libzfs_action); } const char * libzfs_error_description(libzfs_handle_t *hdl) { if (hdl->libzfs_desc[0] != '\0') return (hdl->libzfs_desc); switch (hdl->libzfs_error) { case EZFS_NOMEM: return (dgettext(TEXT_DOMAIN, "out of memory")); case EZFS_BADPROP: return (dgettext(TEXT_DOMAIN, "invalid property value")); case EZFS_PROPREADONLY: return (dgettext(TEXT_DOMAIN, "read-only property")); case EZFS_PROPTYPE: return (dgettext(TEXT_DOMAIN, "property doesn't apply to " "datasets of this type")); case EZFS_PROPNONINHERIT: return (dgettext(TEXT_DOMAIN, "property cannot be inherited")); case EZFS_PROPSPACE: return (dgettext(TEXT_DOMAIN, "invalid quota or reservation")); case EZFS_BADTYPE: return (dgettext(TEXT_DOMAIN, "operation not applicable to " "datasets of this type")); case EZFS_BUSY: return (dgettext(TEXT_DOMAIN, "pool or dataset is busy")); case EZFS_EXISTS: return (dgettext(TEXT_DOMAIN, "pool or dataset exists")); case EZFS_NOENT: return (dgettext(TEXT_DOMAIN, "no such pool or dataset")); case EZFS_BADSTREAM: return (dgettext(TEXT_DOMAIN, "invalid backup stream")); case EZFS_DSREADONLY: return (dgettext(TEXT_DOMAIN, "dataset is read-only")); case EZFS_VOLTOOBIG: return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " "this system")); case EZFS_INVALIDNAME: return (dgettext(TEXT_DOMAIN, "invalid name")); case EZFS_BADRESTORE: return (dgettext(TEXT_DOMAIN, "unable to restore to " "destination")); case EZFS_BADBACKUP: return (dgettext(TEXT_DOMAIN, "backup failed")); case EZFS_BADTARGET: return (dgettext(TEXT_DOMAIN, "invalid target vdev")); case EZFS_NODEVICE: return (dgettext(TEXT_DOMAIN, "no such device in pool")); case EZFS_BADDEV: return (dgettext(TEXT_DOMAIN, "invalid device")); case EZFS_NOREPLICAS: return (dgettext(TEXT_DOMAIN, "no valid replicas")); case EZFS_RESILVERING: return (dgettext(TEXT_DOMAIN, "currently resilvering")); case EZFS_BADVERSION: return (dgettext(TEXT_DOMAIN, "unsupported version or " "feature")); case EZFS_POOLUNAVAIL: return (dgettext(TEXT_DOMAIN, "pool is unavailable")); case EZFS_DEVOVERFLOW: return (dgettext(TEXT_DOMAIN, "too many devices in one vdev")); case EZFS_BADPATH: return (dgettext(TEXT_DOMAIN, "must be an absolute path")); case EZFS_CROSSTARGET: return (dgettext(TEXT_DOMAIN, "operation crosses datasets or " "pools")); case EZFS_ZONED: return (dgettext(TEXT_DOMAIN, "dataset in use by local zone")); case EZFS_MOUNTFAILED: return (dgettext(TEXT_DOMAIN, "mount failed")); case EZFS_UMOUNTFAILED: return (dgettext(TEXT_DOMAIN, "unmount failed")); case EZFS_UNSHARENFSFAILED: return (dgettext(TEXT_DOMAIN, "NFS share removal failed")); case EZFS_SHARENFSFAILED: return (dgettext(TEXT_DOMAIN, "NFS share creation failed")); case EZFS_UNSHARESMBFAILED: return (dgettext(TEXT_DOMAIN, "SMB share removal failed")); case EZFS_SHARESMBFAILED: return (dgettext(TEXT_DOMAIN, "SMB share creation failed")); case EZFS_PERM: return (dgettext(TEXT_DOMAIN, "permission denied")); case EZFS_NOSPC: return (dgettext(TEXT_DOMAIN, "out of space")); case EZFS_FAULT: return (dgettext(TEXT_DOMAIN, "bad address")); case EZFS_IO: return (dgettext(TEXT_DOMAIN, "I/O error")); case EZFS_INTR: return (dgettext(TEXT_DOMAIN, "signal received")); case EZFS_CKSUM: return (dgettext(TEXT_DOMAIN, "insufficient replicas")); case EZFS_ISSPARE: return (dgettext(TEXT_DOMAIN, "device is reserved as a hot " "spare")); case EZFS_INVALCONFIG: return (dgettext(TEXT_DOMAIN, "invalid vdev configuration")); case EZFS_RECURSIVE: return (dgettext(TEXT_DOMAIN, "recursive dataset dependency")); case EZFS_NOHISTORY: return (dgettext(TEXT_DOMAIN, "no history available")); case EZFS_POOLPROPS: return (dgettext(TEXT_DOMAIN, "failed to retrieve " "pool properties")); case EZFS_POOL_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this type of pool")); case EZFS_POOL_INVALARG: return (dgettext(TEXT_DOMAIN, "invalid argument for " "this pool operation")); case EZFS_NAMETOOLONG: return (dgettext(TEXT_DOMAIN, "dataset name is too long")); case EZFS_OPENFAILED: return (dgettext(TEXT_DOMAIN, "open failed")); case EZFS_NOCAP: return (dgettext(TEXT_DOMAIN, "disk capacity information could not be retrieved")); case EZFS_LABELFAILED: return (dgettext(TEXT_DOMAIN, "write of label failed")); case EZFS_BADWHO: return (dgettext(TEXT_DOMAIN, "invalid user/group")); case EZFS_BADPERM: return (dgettext(TEXT_DOMAIN, "invalid permission")); case EZFS_BADPERMSET: return (dgettext(TEXT_DOMAIN, "invalid permission set name")); case EZFS_NODELEGATION: return (dgettext(TEXT_DOMAIN, "delegated administration is " "disabled on pool")); case EZFS_BADCACHE: return (dgettext(TEXT_DOMAIN, "invalid or missing cache file")); case EZFS_ISL2CACHE: return (dgettext(TEXT_DOMAIN, "device is in use as a cache")); case EZFS_VDEVNOTSUP: return (dgettext(TEXT_DOMAIN, "vdev specification is not " "supported")); case EZFS_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this dataset")); case EZFS_IOC_NOTSUPPORTED: return (dgettext(TEXT_DOMAIN, "operation not supported by " "zfs kernel module")); case EZFS_ACTIVE_SPARE: return (dgettext(TEXT_DOMAIN, "pool has active shared spare " "device")); case EZFS_UNPLAYED_LOGS: return (dgettext(TEXT_DOMAIN, "log device has unplayed intent " "logs")); case EZFS_REFTAG_RELE: return (dgettext(TEXT_DOMAIN, "no such tag on this dataset")); case EZFS_REFTAG_HOLD: return (dgettext(TEXT_DOMAIN, "tag already exists on this " "dataset")); case EZFS_TAGTOOLONG: return (dgettext(TEXT_DOMAIN, "tag too long")); case EZFS_PIPEFAILED: return (dgettext(TEXT_DOMAIN, "pipe create failed")); case EZFS_THREADCREATEFAILED: return (dgettext(TEXT_DOMAIN, "thread create failed")); case EZFS_POSTSPLIT_ONLINE: return (dgettext(TEXT_DOMAIN, "disk was split from this pool " "into a new one")); case EZFS_SCRUB_PAUSED: return (dgettext(TEXT_DOMAIN, "scrub is paused; " "use 'zpool scrub' to resume")); case EZFS_SCRUBBING: return (dgettext(TEXT_DOMAIN, "currently scrubbing; " "use 'zpool scrub -s' to cancel current scrub")); case EZFS_NO_SCRUB: return (dgettext(TEXT_DOMAIN, "there is no active scrub")); case EZFS_DIFF: return (dgettext(TEXT_DOMAIN, "unable to generate diffs")); case EZFS_DIFFDATA: return (dgettext(TEXT_DOMAIN, "invalid diff data")); case EZFS_POOLREADONLY: return (dgettext(TEXT_DOMAIN, "pool is read-only")); case EZFS_NO_PENDING: return (dgettext(TEXT_DOMAIN, "operation is not " "in progress")); case EZFS_CHECKPOINT_EXISTS: return (dgettext(TEXT_DOMAIN, "checkpoint exists")); case EZFS_DISCARDING_CHECKPOINT: return (dgettext(TEXT_DOMAIN, "currently discarding " "checkpoint")); case EZFS_NO_CHECKPOINT: return (dgettext(TEXT_DOMAIN, "checkpoint does not exist")); case EZFS_DEVRM_IN_PROGRESS: return (dgettext(TEXT_DOMAIN, "device removal in progress")); case EZFS_VDEV_TOO_BIG: return (dgettext(TEXT_DOMAIN, "device exceeds supported size")); case EZFS_ACTIVE_POOL: return (dgettext(TEXT_DOMAIN, "pool is imported on a " "different host")); case EZFS_CRYPTOFAILED: return (dgettext(TEXT_DOMAIN, "encryption failure")); case EZFS_TOOMANY: return (dgettext(TEXT_DOMAIN, "argument list too long")); case EZFS_INITIALIZING: return (dgettext(TEXT_DOMAIN, "currently initializing")); case EZFS_NO_INITIALIZE: return (dgettext(TEXT_DOMAIN, "there is no active " "initialization")); case EZFS_WRONG_PARENT: return (dgettext(TEXT_DOMAIN, "invalid parent dataset")); case EZFS_TRIMMING: return (dgettext(TEXT_DOMAIN, "currently trimming")); case EZFS_NO_TRIM: return (dgettext(TEXT_DOMAIN, "there is no active trim")); case EZFS_TRIM_NOTSUP: return (dgettext(TEXT_DOMAIN, "trim operations are not " "supported by this device")); case EZFS_NO_RESILVER_DEFER: return (dgettext(TEXT_DOMAIN, "this action requires the " "resilver_defer feature")); case EZFS_EXPORT_IN_PROGRESS: return (dgettext(TEXT_DOMAIN, "pool export in progress")); case EZFS_REBUILDING: return (dgettext(TEXT_DOMAIN, "currently sequentially " "resilvering")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: assert(hdl->libzfs_error == 0); return (dgettext(TEXT_DOMAIN, "no error")); } } /*PRINTFLIKE2*/ void zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...) { va_list ap; va_start(ap, fmt); (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc), fmt, ap); hdl->libzfs_desc_active = 1; va_end(ap); } static void zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) { (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action), fmt, ap); hdl->libzfs_error = error; if (hdl->libzfs_desc_active) hdl->libzfs_desc_active = 0; else hdl->libzfs_desc[0] = '\0'; if (hdl->libzfs_printerr) { if (error == EZFS_UNKNOWN) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal " "error: %s: %s\n"), hdl->libzfs_action, libzfs_error_description(hdl)); abort(); } (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action, libzfs_error_description(hdl)); if (error == EZFS_NOMEM) exit(1); } } int zfs_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zfs_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); zfs_verror(hdl, error, fmt, ap); va_end(ap); return (-1); } static int zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) { switch (error) { case EPERM: case EACCES: zfs_verror(hdl, EZFS_PERM, fmt, ap); return (-1); case ECANCELED: zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap); return (-1); case EIO: zfs_verror(hdl, EZFS_IO, fmt, ap); return (-1); case EFAULT: zfs_verror(hdl, EZFS_FAULT, fmt, ap); return (-1); case EINTR: zfs_verror(hdl, EZFS_INTR, fmt, ap); return (-1); case ECKSUM: zfs_verror(hdl, EZFS_CKSUM, fmt, ap); return (-1); } return (0); } int zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zfs_standard_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (zfs_common_error(hdl, error, fmt, ap) != 0) { va_end(ap); return (-1); } switch (error) { case ENXIO: case ENODEV: case EPIPE: zfs_verror(hdl, EZFS_IO, fmt, ap); break; case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset does not exist")); zfs_verror(hdl, EZFS_NOENT, fmt, ap); break; case ENOSPC: case EDQUOT: zfs_verror(hdl, EZFS_NOSPC, fmt, ap); break; case EEXIST: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); zfs_verror(hdl, EZFS_EXISTS, fmt, ap); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset is busy")); zfs_verror(hdl, EZFS_BUSY, fmt, ap); break; case EROFS: zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); break; case ENAMETOOLONG: zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap); break; case ENOTSUP: zfs_verror(hdl, EZFS_BADVERSION, fmt, ap); break; case EAGAIN: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool I/O is currently suspended")); zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; case EREMOTEIO: zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); break; case ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE: case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " "be required to enable this operation.")); zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; case ZFS_ERR_IOC_ARG_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support an option for this operation. " "A reboot may be required to enable this option.")); zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; case ZFS_ERR_IOC_ARG_REQUIRED: case ZFS_ERR_IOC_ARG_BADTYPE: zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; case ZFS_ERR_WRONG_PARENT: zfs_verror(hdl, EZFS_WRONG_PARENT, fmt, ap); break; case ZFS_ERR_BADPROP: zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; default: zfs_error_aux(hdl, "%s", strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); break; } va_end(ap); return (-1); } void zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, char *errbuf) { switch (err) { case ENOSPC: /* * For quotas and reservations, ENOSPC indicates * something different; setting a quota or reservation * doesn't use any disk space. */ switch (prop) { case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "size is less than current used or " "reserved space")); (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "size is greater than available space")); (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; default: (void) zfs_standard_error(hdl, err, errbuf); break; } break; case EBUSY: (void) zfs_standard_error(hdl, EBUSY, errbuf); break; case EROFS: (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); break; case E2BIG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property value too long")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); break; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool and or dataset must be upgraded to set this " "property or value")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case ERANGE: if (prop == ZFS_PROP_COMPRESSION || prop == ZFS_PROP_DNODESIZE || prop == ZFS_PROP_RECORDSIZE) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "bootable datasets")); (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); } else if (prop == ZFS_PROP_CHECKSUM || prop == ZFS_PROP_DEDUP) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "root pools")); (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } break; case EINVAL: if (prop == ZPROP_INVAL) { (void) zfs_error(hdl, EZFS_BADPROP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } break; case ZFS_ERR_BADPROP: (void) zfs_error(hdl, EZFS_BADPROP, errbuf); break; case EACCES: if (prop == ZFS_PROP_KEYLOCATION) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "keylocation may only be set on encryption roots")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } break; case EOVERFLOW: /* * This platform can't address a volume this big. */ #ifdef _ILP32 if (prop == ZFS_PROP_VOLSIZE) { (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); break; } #endif fallthrough; default: (void) zfs_standard_error(hdl, err, errbuf); } } int zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg) { return (zpool_standard_error_fmt(hdl, error, "%s", msg)); } /*PRINTFLIKE3*/ int zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (zfs_common_error(hdl, error, fmt, ap) != 0) { va_end(ap); return (-1); } switch (error) { case ENODEV: zfs_verror(hdl, EZFS_NODEVICE, fmt, ap); break; case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool or dataset")); zfs_verror(hdl, EZFS_NOENT, fmt, ap); break; case EEXIST: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool already exists")); zfs_verror(hdl, EZFS_EXISTS, fmt, ap); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy")); zfs_verror(hdl, EZFS_BUSY, fmt, ap); break; /* There is no pending operation to cancel */ case ENOTACTIVE: zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap); break; case ENXIO: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is currently unavailable")); zfs_verror(hdl, EZFS_BADDEV, fmt, ap); break; case ENAMETOOLONG: zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap); break; case ENOTSUP: zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap); break; case EINVAL: zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap); break; case ENOSPC: case EDQUOT: zfs_verror(hdl, EZFS_NOSPC, fmt, ap); break; case EAGAIN: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool I/O is currently suspended")); zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; case EROFS: zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); break; case EDOM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "block size out of range or does not match")); zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; case EREMOTEIO: zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); break; case ZFS_ERR_CHECKPOINT_EXISTS: zfs_verror(hdl, EZFS_CHECKPOINT_EXISTS, fmt, ap); break; case ZFS_ERR_DISCARDING_CHECKPOINT: zfs_verror(hdl, EZFS_DISCARDING_CHECKPOINT, fmt, ap); break; case ZFS_ERR_NO_CHECKPOINT: zfs_verror(hdl, EZFS_NO_CHECKPOINT, fmt, ap); break; case ZFS_ERR_DEVRM_IN_PROGRESS: zfs_verror(hdl, EZFS_DEVRM_IN_PROGRESS, fmt, ap); break; case ZFS_ERR_VDEV_TOO_BIG: zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap); break; case ZFS_ERR_EXPORT_IN_PROGRESS: zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap); break; case ZFS_ERR_RESILVER_IN_PROGRESS: zfs_verror(hdl, EZFS_RESILVERING, fmt, ap); break; case ZFS_ERR_REBUILD_IN_PROGRESS: zfs_verror(hdl, EZFS_REBUILDING, fmt, ap); break; case ZFS_ERR_BADPROP: zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " "be required to enable this operation.")); zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; case ZFS_ERR_IOC_ARG_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support an option for this operation. " "A reboot may be required to enable this option.")); zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; case ZFS_ERR_IOC_ARG_REQUIRED: case ZFS_ERR_IOC_ARG_BADTYPE: zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; default: zfs_error_aux(hdl, "%s", strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); } va_end(ap); return (-1); } /* * Display an out of memory error message and abort the current program. */ int no_memory(libzfs_handle_t *hdl) { return (zfs_error(hdl, EZFS_NOMEM, "internal error")); } /* * A safe form of malloc() which will die if the allocation fails. */ void * zfs_alloc(libzfs_handle_t *hdl, size_t size) { void *data; if ((data = calloc(1, size)) == NULL) (void) no_memory(hdl); return (data); } /* * A safe form of asprintf() which will die if the allocation fails. */ /*PRINTFLIKE2*/ char * zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...) { va_list ap; char *ret; int err; va_start(ap, fmt); err = vasprintf(&ret, fmt, ap); va_end(ap); if (err < 0) { (void) no_memory(hdl); ret = NULL; } return (ret); } /* * A safe form of realloc(), which also zeroes newly allocated space. */ void * zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize) { void *ret; if ((ret = realloc(ptr, newsize)) == NULL) { (void) no_memory(hdl); return (NULL); } bzero((char *)ret + oldsize, (newsize - oldsize)); return (ret); } /* * A safe form of strdup() which will die if the allocation fails. */ char * zfs_strdup(libzfs_handle_t *hdl, const char *str) { char *ret; if ((ret = strdup(str)) == NULL) (void) no_memory(hdl); return (ret); } void libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr) { hdl->libzfs_printerr = printerr; } /* * Read lines from an open file descriptor and store them in an array of * strings until EOF. lines[] will be allocated and populated with all the * lines read. All newlines are replaced with NULL terminators for * convenience. lines[] must be freed after use with libzfs_free_str_array(). * * Returns the number of lines read. */ static int libzfs_read_stdout_from_fd(int fd, char **lines[]) { FILE *fp; int lines_cnt = 0; size_t len = 0; char *line = NULL; char **tmp_lines = NULL, **tmp; char *nl = NULL; int rc; fp = fdopen(fd, "r"); if (fp == NULL) return (0); while (1) { rc = getline(&line, &len, fp); if (rc == -1) break; tmp = realloc(tmp_lines, sizeof (*tmp_lines) * (lines_cnt + 1)); if (tmp == NULL) { /* Return the lines we were able to process */ break; } tmp_lines = tmp; /* Terminate newlines */ if ((nl = strchr(line, '\n')) != NULL) *nl = '\0'; tmp_lines[lines_cnt] = line; lines_cnt++; line = NULL; } fclose(fp); *lines = tmp_lines; return (lines_cnt); } static int libzfs_run_process_impl(const char *path, char *argv[], char *env[], int flags, char **lines[], int *lines_cnt) { pid_t pid; int error, devnull_fd; int link[2]; /* * Setup a pipe between our child and parent process if we're * reading stdout. */ if ((lines != NULL) && pipe2(link, O_CLOEXEC) == -1) return (-EPIPE); pid = vfork(); if (pid == 0) { /* Child process */ devnull_fd = open("/dev/null", O_WRONLY | O_CLOEXEC); if (devnull_fd < 0) _exit(-1); if (!(flags & STDOUT_VERBOSE) && (lines == NULL)) (void) dup2(devnull_fd, STDOUT_FILENO); else if (lines != NULL) { /* Save the output to lines[] */ dup2(link[1], STDOUT_FILENO); } if (!(flags & STDERR_VERBOSE)) (void) dup2(devnull_fd, STDERR_FILENO); if (flags & NO_DEFAULT_PATH) { if (env == NULL) execv(path, argv); else execve(path, argv, env); } else { if (env == NULL) execvp(path, argv); else execvpe(path, argv, env); } _exit(-1); } else if (pid > 0) { /* Parent process */ int status; while ((error = waitpid(pid, &status, 0)) == -1 && errno == EINTR) { } if (error < 0 || !WIFEXITED(status)) return (-1); if (lines != NULL) { close(link[1]); *lines_cnt = libzfs_read_stdout_from_fd(link[0], lines); } return (WEXITSTATUS(status)); } return (-1); } int libzfs_run_process(const char *path, char *argv[], int flags) { return (libzfs_run_process_impl(path, argv, NULL, flags, NULL, NULL)); } /* * Run a command and store its stdout lines in an array of strings (lines[]). * lines[] is allocated and populated for you, and the number of lines is set in * lines_cnt. lines[] must be freed after use with libzfs_free_str_array(). * All newlines (\n) in lines[] are terminated for convenience. */ int libzfs_run_process_get_stdout(const char *path, char *argv[], char *env[], char **lines[], int *lines_cnt) { return (libzfs_run_process_impl(path, argv, env, 0, lines, lines_cnt)); } /* * Same as libzfs_run_process_get_stdout(), but run without $PATH set. This * means that *path needs to be the full path to the executable. */ int libzfs_run_process_get_stdout_nopath(const char *path, char *argv[], char *env[], char **lines[], int *lines_cnt) { return (libzfs_run_process_impl(path, argv, env, NO_DEFAULT_PATH, lines, lines_cnt)); } /* * Free an array of strings. Free both the strings contained in the array and * the array itself. */ void libzfs_free_str_array(char **strs, int count) { while (--count >= 0) free(strs[count]); free(strs); } /* * Returns 1 if environment variable is set to "YES", "yes", "ON", "on", or * a non-zero number. * * Returns 0 otherwise. */ int libzfs_envvar_is_set(char *envvar) { char *env = getenv(envvar); if (env && (strtoul(env, NULL, 0) > 0 || (!strncasecmp(env, "YES", 3) && strnlen(env, 4) == 3) || (!strncasecmp(env, "ON", 2) && strnlen(env, 3) == 2))) return (1); return (0); } libzfs_handle_t * libzfs_init(void) { libzfs_handle_t *hdl; int error; char *env; if ((error = libzfs_load_module()) != 0) { errno = error; return (NULL); } if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) { return (NULL); } if (regcomp(&hdl->libzfs_urire, URI_REGEX, 0) != 0) { free(hdl); return (NULL); } if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR|O_EXCL|O_CLOEXEC)) < 0) { free(hdl); return (NULL); } #ifdef HAVE_SETMNTENT if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "re")) == NULL) { #else if ((hdl->libzfs_mnttab = fopen(MNTTAB, "re")) == NULL) { #endif (void) close(hdl->libzfs_fd); free(hdl); return (NULL); } if (libzfs_core_init() != 0) { (void) close(hdl->libzfs_fd); (void) fclose(hdl->libzfs_mnttab); free(hdl); return (NULL); } zfs_prop_init(); zpool_prop_init(); zpool_feature_init(); libzfs_mnttab_init(hdl); fletcher_4_init(); if (getenv("ZFS_PROP_DEBUG") != NULL) { hdl->libzfs_prop_debug = B_TRUE; } if ((env = getenv("ZFS_SENDRECV_MAX_NVLIST")) != NULL) { if ((error = zfs_nicestrtonum(hdl, env, &hdl->libzfs_max_nvlist))) { errno = error; (void) close(hdl->libzfs_fd); (void) fclose(hdl->libzfs_mnttab); free(hdl); return (NULL); } } else { hdl->libzfs_max_nvlist = (SPA_MAXBLOCKSIZE * 4); } /* * For testing, remove some settable properties and features */ if (libzfs_envvar_is_set("ZFS_SYSFS_PROP_SUPPORT_TEST")) { zprop_desc_t *proptbl; proptbl = zpool_prop_get_table(); proptbl[ZPOOL_PROP_COMMENT].pd_zfs_mod_supported = B_FALSE; proptbl = zfs_prop_get_table(); proptbl[ZFS_PROP_DNODESIZE].pd_zfs_mod_supported = B_FALSE; zfeature_info_t *ftbl = spa_feature_table; ftbl[SPA_FEATURE_LARGE_BLOCKS].fi_zfs_mod_supported = B_FALSE; } return (hdl); } void libzfs_fini(libzfs_handle_t *hdl) { (void) close(hdl->libzfs_fd); if (hdl->libzfs_mnttab) #ifdef HAVE_SETMNTENT (void) endmntent(hdl->libzfs_mnttab); #else (void) fclose(hdl->libzfs_mnttab); #endif zpool_free_handles(hdl); namespace_clear(hdl); libzfs_mnttab_fini(hdl); libzfs_core_fini(); regfree(&hdl->libzfs_urire); fletcher_4_fini(); #if LIBFETCH_DYNAMIC if (hdl->libfetch != (void *)-1 && hdl->libfetch != NULL) (void) dlclose(hdl->libfetch); free(hdl->libfetch_load_error); #endif free(hdl); } libzfs_handle_t * zpool_get_handle(zpool_handle_t *zhp) { return (zhp->zpool_hdl); } libzfs_handle_t * zfs_get_handle(zfs_handle_t *zhp) { return (zhp->zfs_hdl); } zpool_handle_t * zfs_get_pool_handle(const zfs_handle_t *zhp) { return (zhp->zpool_hdl); } /* * Given a name, determine whether or not it's a valid path * (starts with '/' or "./"). If so, walk the mnttab trying * to match the device number. If not, treat the path as an * fs/vol/snap/bkmark name. */ zfs_handle_t * zfs_path_to_zhandle(libzfs_handle_t *hdl, const char *path, zfs_type_t argtype) { struct stat64 statbuf; struct extmnttab entry; if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) { /* * It's not a valid path, assume it's a name of type 'argtype'. */ return (zfs_open(hdl, path, argtype)); } /* Reopen MNTTAB to prevent reading stale data from open file */ if (freopen(MNTTAB, "re", hdl->libzfs_mnttab) == NULL) return (NULL); if (getextmntent(path, &entry, &statbuf) != 0) return (NULL); if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { (void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"), path); return (NULL); } return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM)); } /* * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from * an ioctl(). */ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) len = 256 * 1024; zc->zc_nvlist_dst_size = len; zc->zc_nvlist_dst = (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); if (zc->zc_nvlist_dst == 0) return (-1); return (0); } /* * Called when an ioctl() which returns an nvlist fails with ENOMEM. This will * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was * filled in by the kernel to indicate the actual required size. */ int zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc) { free((void *)(uintptr_t)zc->zc_nvlist_dst); zc->zc_nvlist_dst = (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); if (zc->zc_nvlist_dst == 0) return (-1); return (0); } /* * Called to free the src and dst nvlists stored in the command structure. */ void zcmd_free_nvlists(zfs_cmd_t *zc) { free((void *)(uintptr_t)zc->zc_nvlist_conf); free((void *)(uintptr_t)zc->zc_nvlist_src); free((void *)(uintptr_t)zc->zc_nvlist_dst); zc->zc_nvlist_conf = 0; zc->zc_nvlist_src = 0; zc->zc_nvlist_dst = 0; } static int zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, nvlist_t *nvl) { char *packed; size_t len; verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0); if ((packed = zfs_alloc(hdl, len)) == NULL) return (-1); verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); *outnv = (uint64_t)(uintptr_t)packed; *outlen = len; return (0); } int zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) { return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf, &zc->zc_nvlist_conf_size, nvl)); } int zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) { return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src, &zc->zc_nvlist_src_size, nvl)); } /* * Unpacks an nvlist from the ZFS ioctl command structure. */ int zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) { if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst, zc->zc_nvlist_dst_size, nvlp, 0) != 0) return (no_memory(hdl)); return (0); } /* * ================================================================ * API shared by zfs and zpool property management * ================================================================ */ static void zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) { zprop_list_t *pl = cbp->cb_proplist; int i; char *title; size_t len; cbp->cb_first = B_FALSE; if (cbp->cb_scripted) return; /* * Start with the length of the column headers. */ cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME")); cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN, "PROPERTY")); cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN, "VALUE")); cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN, "RECEIVED")); cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN, "SOURCE")); /* first property is always NAME */ assert(cbp->cb_proplist->pl_prop == ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME)); /* * Go through and calculate the widths for each column. For the * 'source' column, we kludge it up by taking the worst-case scenario of * inheriting from the longest name. This is acceptable because in the * majority of cases 'SOURCE' is the last column displayed, and we don't * use the width anyway. Note that the 'VALUE' column can be oversized, * if the name of the property is much longer than any values we find. */ for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { /* * 'PROPERTY' column */ if (pl->pl_prop != ZPROP_INVAL) { const char *propname = (type == ZFS_TYPE_POOL) ? zpool_prop_to_name(pl->pl_prop) : zfs_prop_to_name(pl->pl_prop); len = strlen(propname); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; } else { len = strlen(pl->pl_user_prop); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; } /* * 'VALUE' column. The first property is always the 'name' * property that was tacked on either by /sbin/zfs's * zfs_do_get() or when calling zprop_expand_list(), so we * ignore its width. If the user specified the name property * to display, then it will be later in the list in any case. */ if (pl != cbp->cb_proplist && pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE]) cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width; /* 'RECEIVED' column. */ if (pl != cbp->cb_proplist && pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD]) cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width; /* * 'NAME' and 'SOURCE' columns */ if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME : ZFS_PROP_NAME) && pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) { cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width; cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width + strlen(dgettext(TEXT_DOMAIN, "inherited from")); } } /* * Now go through and print the headers. */ for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: title = dgettext(TEXT_DOMAIN, "NAME"); break; case GET_COL_PROPERTY: title = dgettext(TEXT_DOMAIN, "PROPERTY"); break; case GET_COL_VALUE: title = dgettext(TEXT_DOMAIN, "VALUE"); break; case GET_COL_RECVD: title = dgettext(TEXT_DOMAIN, "RECEIVED"); break; case GET_COL_SOURCE: title = dgettext(TEXT_DOMAIN, "SOURCE"); break; default: title = NULL; } if (title != NULL) { if (i == (ZFS_GET_NCOLS - 1) || cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", title); else (void) printf("%-*s ", cbp->cb_colwidths[cbp->cb_columns[i]], title); } } (void) printf("\n"); } /* * Display a single line of output, according to the settings in the callback * structure. */ void zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, const char *propname, const char *value, zprop_source_t sourcetype, const char *source, const char *recvd_value) { int i; const char *str = NULL; char buf[128]; /* * Ignore those source types that the user has chosen to ignore. */ if ((sourcetype & cbp->cb_sources) == 0) return; if (cbp->cb_first) zprop_print_headers(cbp, cbp->cb_type); for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: str = name; break; case GET_COL_PROPERTY: str = propname; break; case GET_COL_VALUE: str = value; break; case GET_COL_SOURCE: switch (sourcetype) { case ZPROP_SRC_NONE: str = "-"; break; case ZPROP_SRC_DEFAULT: str = "default"; break; case ZPROP_SRC_LOCAL: str = "local"; break; case ZPROP_SRC_TEMPORARY: str = "temporary"; break; case ZPROP_SRC_INHERITED: (void) snprintf(buf, sizeof (buf), "inherited from %s", source); str = buf; break; case ZPROP_SRC_RECEIVED: str = "received"; break; default: str = NULL; assert(!"unhandled zprop_source_t"); } break; case GET_COL_RECVD: str = (recvd_value == NULL ? "-" : recvd_value); break; default: continue; } if (i == (ZFS_GET_NCOLS - 1) || cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", str); else if (cbp->cb_scripted) (void) printf("%s\t", str); else (void) printf("%-*s ", cbp->cb_colwidths[cbp->cb_columns[i]], str); } (void) printf("\n"); } /* * Given a numeric suffix, convert the value into a number of bits that the * resulting value must be shifted. */ static int str2shift(libzfs_handle_t *hdl, const char *buf) { const char *ends = "BKMGTPEZ"; int i; if (buf[0] == '\0') return (0); for (i = 0; i < strlen(ends); i++) { if (toupper(buf[0]) == ends[i]) break; } if (i == strlen(ends)) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid numeric suffix '%s'"), buf); return (-1); } /* * Allow 'G' = 'GB' = 'GiB', case-insensitively. * However, 'BB' and 'BiB' are disallowed. */ if (buf[1] == '\0' || (toupper(buf[0]) != 'B' && ((toupper(buf[1]) == 'B' && buf[2] == '\0') || (toupper(buf[1]) == 'I' && toupper(buf[2]) == 'B' && buf[3] == '\0')))) return (10 * i); if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid numeric suffix '%s'"), buf); return (-1); } /* * Convert a string of the form '100G' into a real number. Used when setting * properties or creating a volume. 'buf' is used to place an extended error * message for the caller to use. */ int zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) { char *end; int shift; *num = 0; /* Check to see if this looks like a number. */ if ((value[0] < '0' || value[0] > '9') && value[0] != '.') { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "bad numeric value '%s'"), value); return (-1); } /* Rely on strtoull() to process the numeric portion. */ errno = 0; *num = strtoull(value, &end, 10); /* * Check for ERANGE, which indicates that the value is too large to fit * in a 64-bit value. */ if (errno == ERANGE) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } /* * If we have a decimal value, then do the computation with floating * point arithmetic. Otherwise, use standard arithmetic. */ if (*end == '.') { double fval = strtod(value, &end); if ((shift = str2shift(hdl, end)) == -1) return (-1); fval *= pow(2, shift); /* * UINT64_MAX is not exactly representable as a double. * The closest representation is UINT64_MAX + 1, so we * use a >= comparison instead of > for the bounds check. */ if (fval >= (double)UINT64_MAX) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } *num = (uint64_t)fval; } else { if ((shift = str2shift(hdl, end)) == -1) return (-1); /* Check for overflow */ if (shift >= 64 || (*num << shift) >> shift != *num) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "numeric value is too large")); return (-1); } *num <<= shift; } return (0); } /* * Given a propname=value nvpair to set, parse any numeric properties * (index, boolean, etc) if they are specified as strings and add the * resulting nvpair to the returned nvlist. * * At the DSL layer, all properties are either 64-bit numbers or strings. * We want the user to be able to ignore this fact and specify properties * as native values (numbers, for example) or as strings (to simplify * command line utilities). This also handles converting index types * (compression, checksum, etc) from strings to their on-disk index. */ int zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp, const char *errbuf) { data_type_t datatype = nvpair_type(elem); zprop_type_t proptype; const char *propname; char *value; boolean_t isnone = B_FALSE; boolean_t isauto = B_FALSE; int err = 0; if (type == ZFS_TYPE_POOL) { proptype = zpool_prop_get_type(prop); propname = zpool_prop_to_name(prop); } else { proptype = zfs_prop_get_type(prop); propname = zfs_prop_to_name(prop); } /* * Convert any properties to the internal DSL value types. */ *svalp = NULL; *ivalp = 0; switch (proptype) { case PROP_TYPE_STRING: if (datatype != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), nvpair_name(elem)); goto error; } err = nvpair_value_string(elem, svalp); if (err != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is invalid"), nvpair_name(elem)); goto error; } if (strlen(*svalp) >= ZFS_MAXPROPLEN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is too long"), nvpair_name(elem)); goto error; } break; case PROP_TYPE_NUMBER: if (datatype == DATA_TYPE_STRING) { (void) nvpair_value_string(elem, &value); if (strcmp(value, "none") == 0) { isnone = B_TRUE; } else if (strcmp(value, "auto") == 0) { isauto = B_TRUE; } else if (zfs_nicestrtonum(hdl, value, ivalp) != 0) { goto error; } } else if (datatype == DATA_TYPE_UINT64) { (void) nvpair_value_uint64(elem, ivalp); } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a number"), nvpair_name(elem)); goto error; } /* * Quota special: force 'none' and don't allow 0. */ if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone && (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "use 'none' to disable quota/refquota")); goto error; } /* * Special handling for "*_limit=none". In this case it's not * 0 but UINT64_MAX. */ if ((type & ZFS_TYPE_DATASET) && isnone && (prop == ZFS_PROP_FILESYSTEM_LIMIT || prop == ZFS_PROP_SNAPSHOT_LIMIT)) { *ivalp = UINT64_MAX; } /* * Special handling for setting 'refreservation' to 'auto'. Use * UINT64_MAX to tell the caller to use zfs_fix_auto_resv(). * 'auto' is only allowed on volumes. */ if (isauto) { switch (prop) { case ZFS_PROP_REFRESERVATION: if ((type & ZFS_TYPE_VOLUME) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s=auto' only allowed on " "volumes"), nvpair_name(elem)); goto error; } *ivalp = UINT64_MAX; break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'auto' is invalid value for '%s'"), nvpair_name(elem)); goto error; } } break; case PROP_TYPE_INDEX: if (datatype != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), nvpair_name(elem)); goto error; } (void) nvpair_value_string(elem, &value); if (zprop_string_to_index(prop, value, ivalp, type) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be one of '%s'"), propname, zprop_values(prop, type)); goto error; } break; default: abort(); } /* * Add the result to our return set of properties. */ if (*svalp != NULL) { if (nvlist_add_string(ret, propname, *svalp) != 0) { (void) no_memory(hdl); return (-1); } } else { if (nvlist_add_uint64(ret, propname, *ivalp) != 0) { (void) no_memory(hdl); return (-1); } } return (0); error: (void) zfs_error(hdl, EZFS_BADPROP, errbuf); return (-1); } static int addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp, zfs_type_t type) { int prop; zprop_list_t *entry; prop = zprop_name_to_prop(propname, type); if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type, B_FALSE)) prop = ZPROP_INVAL; /* * When no property table entry can be found, return failure if * this is a pool property or if this isn't a user-defined * dataset property, */ if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL && !zpool_prop_feature(propname) && !zpool_prop_unsupported(propname)) || (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) && !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) return (-1); entry->pl_prop = prop; if (prop == ZPROP_INVAL) { if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) { free(entry); return (-1); } entry->pl_width = strlen(propname); } else { entry->pl_width = zprop_width(prop, &entry->pl_fixed, type); } *listp = entry; return (0); } /* * Given a comma-separated list of properties, construct a property list * containing both user-defined and native properties. This function will * return a NULL list if 'all' is specified, which can later be expanded * by zprop_expand_list(). */ int zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp, zfs_type_t type) { *listp = NULL; /* * If 'all' is specified, return a NULL list. */ if (strcmp(props, "all") == 0) return (0); /* * If no props were specified, return an error. */ if (props[0] == '\0') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no properties specified")); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } /* * It would be nice to use getsubopt() here, but the inclusion of column * aliases makes this more effort than it's worth. */ while (*props != '\0') { size_t len; char *p; char c; if ((p = strchr(props, ',')) == NULL) { len = strlen(props); p = props + len; } else { len = p - props; } /* * Check for empty options. */ if (len == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "empty property name")); return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, "bad property list"))); } /* * Check all regular property names. */ c = props[len]; props[len] = '\0'; if (strcmp(props, "space") == 0) { static char *spaceprops[] = { "name", "avail", "used", "usedbysnapshots", "usedbydataset", "usedbyrefreservation", "usedbychildren", NULL }; int i; for (i = 0; spaceprops[i]; i++) { if (addlist(hdl, spaceprops[i], listp, type)) return (-1); listp = &(*listp)->pl_next; } } else { if (addlist(hdl, props, listp, type)) return (-1); listp = &(*listp)->pl_next; } props = p; if (c == ',') props++; } return (0); } void zprop_free_list(zprop_list_t *pl) { zprop_list_t *next; while (pl != NULL) { next = pl->pl_next; free(pl->pl_user_prop); free(pl); pl = next; } } typedef struct expand_data { zprop_list_t **last; libzfs_handle_t *hdl; zfs_type_t type; } expand_data_t; static int zprop_expand_list_cb(int prop, void *cb) { zprop_list_t *entry; expand_data_t *edp = cb; if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL) return (ZPROP_INVAL); entry->pl_prop = prop; entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type); entry->pl_all = B_TRUE; *(edp->last) = entry; edp->last = &entry->pl_next; return (ZPROP_CONT); } int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type) { zprop_list_t *entry; zprop_list_t **last; expand_data_t exp; if (*plp == NULL) { /* * If this is the very first time we've been called for an 'all' * specification, expand the list to include all native * properties. */ last = plp; exp.last = last; exp.hdl = hdl; exp.type = type; if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE, B_FALSE, type) == ZPROP_INVAL) return (-1); /* * Add 'name' to the beginning of the list, which is handled * specially. */ if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) return (-1); entry->pl_prop = (type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME; entry->pl_width = zprop_width(entry->pl_prop, &entry->pl_fixed, type); entry->pl_all = B_TRUE; entry->pl_next = *plp; *plp = entry; } return (0); } int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type) { return (zprop_iter_common(func, cb, show_all, ordered, type)); } /* * Fill given version buffer with zfs userland version */ void zfs_version_userland(char *version, int len) { (void) strlcpy(version, ZFS_META_ALIAS, len); } /* * Prints both zfs userland and kernel versions * Returns 0 on success, and -1 on error (with errno set) */ int zfs_version_print(void) { char zver_userland[128]; char zver_kernel[128]; zfs_version_userland(zver_userland, sizeof (zver_userland)); (void) printf("%s\n", zver_userland); if (zfs_version_kernel(zver_kernel, sizeof (zver_kernel)) == -1) { fprintf(stderr, "zfs_version_kernel() failed: %s\n", strerror(errno)); return (-1); } (void) printf("zfs-kmod-%s\n", zver_kernel); return (0); } /* * Return 1 if the user requested ANSI color output, and our terminal supports * it. Return 0 for no color. */ int use_color(void) { static int use_color = -1; char *term; /* * Optimization: * * For each zpool invocation, we do a single check to see if we should * be using color or not, and cache that value for the lifetime of the * the zpool command. That makes it cheap to call use_color() when * we're printing with color. We assume that the settings are not going * to change during the invocation of a zpool command (the user isn't * going to change the ZFS_COLOR value while zpool is running, for * example). */ if (use_color != -1) { /* * We've already figured out if we should be using color or * not. Return the cached value. */ return (use_color); } term = getenv("TERM"); /* * The user sets the ZFS_COLOR env var set to enable zpool ANSI color * output. However if NO_COLOR is set (https://no-color.org/) then * don't use it. Also, don't use color if terminal doesn't support * it. */ if (libzfs_envvar_is_set("ZFS_COLOR") && !libzfs_envvar_is_set("NO_COLOR") && isatty(STDOUT_FILENO) && term && strcmp("dumb", term) != 0 && strcmp("unknown", term) != 0) { /* Color supported */ use_color = 1; } else { use_color = 0; } return (use_color); } /* * The functions color_start() and color_end() are used for when you want * to colorize a block of text. * * For example: * color_start(ANSI_RED) * printf("hello"); * printf("world"); * color_end(); */ void color_start(const char *color) { if (color && use_color()) { fputs(color, stdout); fflush(stdout); } } void color_end(void) { if (use_color()) { fputs(ANSI_RESET, stdout); fflush(stdout); } } /* * printf() with a color. If color is NULL, then do a normal printf. */ int printf_color(const char *color, char *format, ...) { va_list aptr; int rc; if (color) color_start(color); va_start(aptr, format); rc = vprintf(format, aptr); va_end(aptr); if (color) color_end(); return (rc); } + +/* PATH + 5 env vars + a NULL entry = 7 */ +#define ZPOOL_VDEV_SCRIPT_ENV_COUNT 7 + +/* + * There's a few places where ZFS will call external scripts (like the script + * in zpool.d/ and `zfs_prepare_disk`). These scripts are called with a + * reduced $PATH, and some vdev specific environment vars set. This function + * will allocate an populate the environment variable array that is passed to + * these scripts. The user must free the arrays with zpool_vdev_free_env() when + * they are done. + * + * The following env vars will be set (but value could be blank): + * + * POOL_NAME + * VDEV_PATH + * VDEV_UPATH + * VDEV_ENC_SYSFS_PATH + * + * In addition, you can set an optional environment variable named 'opt_key' + * to 'opt_val' if you want. + * + * Returns allocated env[] array on success, NULL otherwise. + */ +char ** +zpool_vdev_script_alloc_env(const char *pool_name, + const char *vdev_path, const char *vdev_upath, + const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val) +{ + char **env = NULL; + int rc; + + env = calloc(ZPOOL_VDEV_SCRIPT_ENV_COUNT, sizeof (*env)); + if (!env) + return (NULL); + + env[0] = strdup("PATH=/bin:/sbin:/usr/bin:/usr/sbin"); + if (!env[0]) + goto error; + + /* Setup our custom environment variables */ + rc = asprintf(&env[1], "POOL_NAME=%s", pool_name ? pool_name : ""); + if (rc == -1) { + env[1] = NULL; + goto error; + } + + rc = asprintf(&env[2], "VDEV_PATH=%s", vdev_path ? vdev_path : ""); + if (rc == -1) { + env[2] = NULL; + goto error; + } + + rc = asprintf(&env[3], "VDEV_UPATH=%s", vdev_upath ? vdev_upath : ""); + if (rc == -1) { + env[3] = NULL; + goto error; + } + + rc = asprintf(&env[4], "VDEV_ENC_SYSFS_PATH=%s", + vdev_enc_sysfs_path ? vdev_enc_sysfs_path : ""); + if (rc == -1) { + env[4] = NULL; + goto error; + } + + if (opt_key != NULL) { + rc = asprintf(&env[5], "%s=%s", opt_key, + opt_val ? opt_val : ""); + if (rc == -1) { + env[5] = NULL; + goto error; + } + } + + return (env); + +error: + for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++) + free(env[i]); + + free(env); + + return (NULL); +} + +/* + * Free the env[] array that was allocated by zpool_vdev_script_alloc_env(). + */ +void +zpool_vdev_script_free_env(char **env) +{ + for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++) + free(env[i]); + + free(env); +} + +/* + * Prepare a disk by (optionally) running a program before labeling the disk. + * This can be useful for installing disk firmware or doing some pre-flight + * checks on the disk before it becomes part of the pool. The program run is + * located at ZFSEXECDIR/zfs_prepare_disk + * (E.x: /usr/local/libexec/zfs/zfs_prepare_disk). + * + * Return 0 on success, non-zero on failure. + */ +int +zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv, + const char *prepare_str, char **lines[], int *lines_cnt) +{ + const char *script_path = ZFSEXECDIR "/zfs_prepare_disk"; + const char *pool_name; + int rc = 0; + + /* Path to script and a NULL entry */ + char *argv[2] = {(char *)script_path}; + char **env = NULL; + char *path = NULL, *enc_sysfs_path = NULL; + char *upath; + *lines_cnt = 0; + + if (access(script_path, X_OK) != 0) { + /* No script, nothing to do */ + return (0); + } + + (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH, &path); + (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_path); + + upath = zfs_get_underlying_path(path); + pool_name = zhp ? zpool_get_name(zhp) : NULL; + + env = zpool_vdev_script_alloc_env(pool_name, path, upath, + enc_sysfs_path, "VDEV_PREPARE", prepare_str); + + free(upath); + + if (env == NULL) { + return (ENOMEM); + } + + rc = libzfs_run_process_get_stdout(script_path, argv, env, lines, + lines_cnt); + + zpool_vdev_script_free_env(env); + + return (rc); +} + +/* + * Optionally run a script and then label a disk. The script can be used to + * prepare a disk for inclusion into the pool. For example, it might update + * the disk's firmware or check its health. + * + * The 'name' provided is the short name, stripped of any leading + * /dev path, and is passed to zpool_label_disk. vdev_nv is the nvlist for + * the vdev. prepare_str is a string that gets passed as the VDEV_PREPARE + * env variable to the script. + * + * The following env vars are passed to the script: + * + * POOL_NAME: The pool name (blank during zpool create) + * VDEV_PREPARE: Reason why the disk is being prepared for inclusion: + * "create", "add", "replace", or "autoreplace" + * VDEV_PATH: Path to the disk + * VDEV_UPATH: One of the 'underlying paths' to the disk. This is + * useful for DM devices. + * VDEV_ENC_SYSFS_PATH: Path to the disk's enclosure sysfs path, if available. + * + * Note, some of these values can be blank. + * + * Return 0 on success, non-zero otherwise. + */ +int +zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, + const char *name, nvlist_t *vdev_nv, const char *prepare_str, + char **lines[], int *lines_cnt) +{ + int rc; + char vdev_path[MAXPATHLEN]; + (void) snprintf(vdev_path, sizeof (vdev_path), "%s/%s", DISK_ROOT, + name); + + /* zhp will be NULL when creating a pool */ + rc = zpool_prepare_disk(zhp, vdev_nv, prepare_str, lines, lines_cnt); + if (rc != 0) + return (rc); + + rc = zpool_label_disk(hdl, zhp, name); + return (rc); +} diff --git a/man/Makefile.am b/man/Makefile.am index 64650c2b988a..2608461625d4 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -1,118 +1,119 @@ include $(top_srcdir)/config/Substfiles.am EXTRA_DIST += \ man1/cstyle.1 dist_man_MANS = \ man1/zhack.1 \ man1/ztest.1 \ man1/raidz_test.1 \ man1/zvol_wait.1 \ man1/arcstat.1 \ \ man5/vdev_id.conf.5 \ \ man4/spl.4 \ man4/zfs.4 \ \ man7/dracut.zfs.7 \ man7/zpool-features.7 \ man7/zfsconcepts.7 \ man7/zfsprops.7 \ man7/zpoolconcepts.7 \ man7/zpoolprops.7 \ \ man8/fsck.zfs.8 \ man8/mount.zfs.8 \ man8/vdev_id.8 \ man8/zdb.8 \ man8/zfs.8 \ man8/zfs-allow.8 \ man8/zfs-bookmark.8 \ man8/zfs-change-key.8 \ man8/zfs-clone.8 \ man8/zfs-create.8 \ man8/zfs-destroy.8 \ man8/zfs-diff.8 \ man8/zfs-get.8 \ man8/zfs-groupspace.8 \ man8/zfs-hold.8 \ man8/zfs-inherit.8 \ man8/zfs-jail.8 \ man8/zfs-list.8 \ man8/zfs-load-key.8 \ man8/zfs-mount.8 \ man8/zfs-program.8 \ man8/zfs-project.8 \ man8/zfs-projectspace.8 \ man8/zfs-promote.8 \ man8/zfs-receive.8 \ man8/zfs-recv.8 \ man8/zfs-redact.8 \ man8/zfs-release.8 \ man8/zfs-rename.8 \ man8/zfs-rollback.8 \ man8/zfs-send.8 \ man8/zfs-set.8 \ man8/zfs-share.8 \ man8/zfs-snapshot.8 \ man8/zfs-unallow.8 \ man8/zfs-unjail.8 \ man8/zfs-unload-key.8 \ man8/zfs-unmount.8 \ man8/zfs-upgrade.8 \ man8/zfs-userspace.8 \ man8/zfs-wait.8 \ man8/zfs_ids_to_path.8 \ man8/zgenhostid.8 \ man8/zinject.8 \ man8/zpool.8 \ man8/zpool-add.8 \ man8/zpool-attach.8 \ man8/zpool-checkpoint.8 \ man8/zpool-clear.8 \ man8/zpool-create.8 \ man8/zpool-destroy.8 \ man8/zpool-detach.8 \ man8/zpool-events.8 \ man8/zpool-export.8 \ man8/zpool-get.8 \ man8/zpool-history.8 \ man8/zpool-import.8 \ man8/zpool-initialize.8 \ man8/zpool-iostat.8 \ man8/zpool-labelclear.8 \ man8/zpool-list.8 \ man8/zpool-offline.8 \ man8/zpool-online.8 \ man8/zpool-reguid.8 \ man8/zpool-remove.8 \ man8/zpool-reopen.8 \ man8/zpool-replace.8 \ man8/zpool-resilver.8 \ man8/zpool-scrub.8 \ man8/zpool-set.8 \ man8/zpool-split.8 \ man8/zpool-status.8 \ man8/zpool-sync.8 \ man8/zpool-trim.8 \ man8/zpool-upgrade.8 \ man8/zpool-wait.8 \ man8/zstream.8 \ man8/zstreamdump.8 \ man8/zpool_influxdb.8 nodist_man_MANS = \ man8/zed.8 \ - man8/zfs-mount-generator.8 + man8/zfs-mount-generator.8 \ + man8/zfs_prepare_disk.8 SUBSTFILES += $(nodist_man_MANS) if BUILD_LINUX # The manual pager in most Linux distros defaults to "BSD" when .Os is blank, # but leaving it blank makes things a lot easier on # FreeBSD when OpenZFS is vendored in the base system. install-data-hook: cd $(DESTDIR)$(mandir) && $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os OpenZFS/' $(dist_man_MANS) $(nodist_man_MANS) endif diff --git a/man/man8/.gitignore b/man/man8/.gitignore index f2fc702147e9..a468f9cbf9d3 100644 --- a/man/man8/.gitignore +++ b/man/man8/.gitignore @@ -1,2 +1,3 @@ /zed.8 /zfs-mount-generator.8 +/zfs_prepare_disk.8 diff --git a/man/man8/zfs_prepare_disk.8.in b/man/man8/zfs_prepare_disk.8.in new file mode 100644 index 000000000000..2a741531e415 --- /dev/null +++ b/man/man8/zfs_prepare_disk.8.in @@ -0,0 +1,70 @@ +.\" +.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). +.\" Copyright (C) 2023 Lawrence Livermore National Security, LLC. +.\" Refer to the OpenZFS git commit log for authoritative copyright attribution. +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License Version 1.0 (CDDL-1.0). +.\" You can obtain a copy of the license from the top-level file +.\" "OPENSOLARIS.LICENSE" or at . +.\" You may not use this file except in compliance with the license. +.\" +.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) +.\" +.Dd August 30, 2023 +.Dt ZFS_PREPARE_DISK 8 +.Os +. +.Sh NAME +.Nm zfs_prepare_disk +.Nd special script that gets run before bringing a disk into a pool +.Sh DESCRIPTION +.Nm +is an optional script that gets called by libzfs before bringing a disk into a +pool. +It can be modified by the user to run whatever commands are necessary to prepare +a disk for inclusion into the pool. +For example, users can add lines to +.Nm zfs_prepare_disk +to do things like update the drive's firmware or check the drive's health. +.Nm zfs_prepare_disk +is optional and can be removed if not needed. +libzfs will look for the script at @zfsexecdir@/zfs_prepare_disk. +. +.Ss Properties +.Nm zfs_prepare_disk +will be passed the following environment variables: +.sp +.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH" +. +.It Nm POOL_NAME +.No Name of the pool +.It Nm VDEV_PATH +.No Path to the disk (like /dev/sda) +.It Nm VDEV_PREPARE +.No Reason why the disk is being prepared for inclusion +('create', 'add', 'replace', or 'autoreplace'). +This can be useful if you only want the script to be run under certain actions. +.It Nm VDEV_UPATH +.No Path to one of the underlying devices for the +disk. +For multipath this would return one of the /dev/sd* paths to the disk. +If the device is not a device mapper device, then +.Nm VDEV_UPATH +just returns the same value as +.Nm VDEV_PATH +.It Nm VDEV_ENC_SYSFS_PATH +.No Path to the disk's enclosure sysfs path, if available +.El +.Pp +Note that some of these variables may have a blank value. +.Nm POOL_NAME +is blank at pool creation time, for example. +.Sh ENVIRONMENT +.Nm zfs_prepare_disk +runs with a limited $PATH. +.Sh EXIT STATUS +.Nm zfs_prepare_disk +should return 0 on success, non-zero otherwise. +If non-zero is returned, the disk will not be included in the pool. +. diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 047ae7eaca6d..f3c133a29ae8 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -1,90 +1,93 @@ include $(top_srcdir)/config/Shellcheck.am pkgdatadir = $(datadir)/@PACKAGE@ dist_pkgdata_SCRIPTS = \ zimport.sh \ zfs.sh \ zfs-tests.sh \ zloop.sh \ zfs-helpers.sh +dist_zfsexec_SCRIPTS = \ + zfs_prepare_disk + EXTRA_SCRIPTS = \ commitcheck.sh \ common.sh.in \ dkms.mkconf \ dkms.postbuild \ kmodtool \ make_gitrev.sh \ man-dates.sh \ paxcheck.sh \ mancheck.sh EXTRA_DIST = \ cstyle.pl \ enum-extract.pl \ zfs2zol-patch.sed \ zol2zfs-patch.sed \ $(EXTRA_SCRIPTS) SHELLCHECK_IGNORE = ,SC1117,SC2086,SC2295 SHELLCHECKSCRIPTS = $(EXTRA_SCRIPTS) define EXTRA_ENVIRONMENT # Only required for in-tree use export INTREE="yes" export GDB="libtool --mode=execute gdb" export LDMOD=/sbin/insmod export CMD_DIR=@abs_top_builddir@/cmd export UDEV_RULE_DIR=@abs_top_builddir@/udev/rules.d export ZEDLET_ETC_DIR=$$CMD_DIR/zed/zed.d export ZEDLET_LIBEXEC_DIR=$$CMD_DIR/zed/zed.d export ZPOOL_SCRIPT_DIR=$$CMD_DIR/zpool/zpool.d export ZPOOL_SCRIPTS_PATH=$$CMD_DIR/zpool/zpool.d export ZPOOL_COMPAT_DIR=$$CMD_DIR/zpool/compatibility.d export CONTRIB_DIR=@abs_top_builddir@/contrib export LIB_DIR=@abs_top_builddir@/lib export SYSCONF_DIR=@abs_top_builddir@/etc export INSTALL_UDEV_DIR=@udevdir@ export INSTALL_UDEV_RULE_DIR=@udevruledir@ export INSTALL_MOUNT_HELPER_DIR=@mounthelperdir@ export INSTALL_SYSCONF_DIR=@sysconfdir@ export INSTALL_PYTHON_DIR=@pythonsitedir@ export KMOD_SPL=@abs_top_builddir@/module/spl/spl.ko export KMOD_ZAVL=@abs_top_builddir@/module/avl/zavl.ko export KMOD_ZNVPAIR=@abs_top_builddir@/module/nvpair/znvpair.ko export KMOD_ZUNICODE=@abs_top_builddir@/module/unicode/zunicode.ko export KMOD_ZCOMMON=@abs_top_builddir@/module/zcommon/zcommon.ko export KMOD_ZLUA=@abs_top_builddir@/module/lua/zlua.ko export KMOD_ICP=@abs_top_builddir@/module/icp/icp.ko export KMOD_ZFS=@abs_top_builddir@/module/zfs/zfs.ko export KMOD_FREEBSD=@abs_top_builddir@/module/openzfs.ko export KMOD_ZZSTD=@abs_top_builddir@/module/zstd/zzstd.ko endef export EXTRA_ENVIRONMENT all-local: -$(SED) -e '\|^export BIN_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export SBIN_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export LIBEXEC_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export ZTS_DIR=|s|$$|@abs_top_srcdir@/tests|' \ -e '\|^export SCRIPT_DIR=|s|$$|@abs_top_srcdir@/scripts|' \ $(abs_top_srcdir)/scripts/common.sh.in >common.sh -echo "$$EXTRA_ENVIRONMENT" >>common.sh clean-local: -$(RM) common.sh install-data-hook: -$(SED) -e '\|^export BIN_DIR=|s|$$|@bindir@|' \ -e '\|^export SBIN_DIR=|s|$$|@sbindir@|' \ -e '\|^export LIBEXEC_DIR=|s|$$|@zfsexecdir@|' \ -e '\|^export ZTS_DIR=|s|$$|@datadir@/@PACKAGE@|' \ -e '\|^export SCRIPT_DIR=|s|$$|@datadir@/@PACKAGE@|' \ $(abs_top_srcdir)/scripts/common.sh.in \ >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh diff --git a/scripts/zfs_prepare_disk b/scripts/zfs_prepare_disk new file mode 100755 index 000000000000..02aa9f8a7728 --- /dev/null +++ b/scripts/zfs_prepare_disk @@ -0,0 +1,17 @@ +#!/bin/sh +# +# This is an optional helper script that is automatically called by libzfs +# before a disk is about to be added into the pool. It can be modified by +# the user to run whatever commands are necessary to prepare a disk for +# inclusion into the pool. For example, users can add lines to this +# script to do things like update the drive's firmware or check the drive's +# health. The script is optional and can be removed if it is not needed. +# +# See the zfs_prepare_disk(8) man page for details. +# +# Example: +# +# echo "Prepare disk $VDEV_PATH ($VDEV_UPATH) for $VDEV_PREPARE in $POOL_NAME" +# + +exit 0