diff --git a/cmd/zpool/os/freebsd/zpool_vdev_os.c b/cmd/zpool/os/freebsd/zpool_vdev_os.c index aa66d29fa604..2e0faa7ecad2 100644 --- a/cmd/zpool/os/freebsd/zpool_vdev_os.c +++ b/cmd/zpool/os/freebsd/zpool_vdev_os.c @@ -1,118 +1,132 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright (c) 2016, 2017 Intel Corporation. * Copyright 2016 Igor Kozhukhov . */ /* * Functions to convert between a list of vdevs and an nvlist representing the * configuration. Each entry in the list can be one of: * * Device vdevs * disk=(path=..., devid=...) * file=(path=...) * * Group vdevs * raidz[1|2]=(...) * mirror=(...) * * Hot spares * * While the underlying implementation supports it, group vdevs cannot contain * other group vdevs. All userland verification of devices is contained within * this file. If successful, the nvlist returned can be passed directly to the * kernel; we've done as much verification as possible in userland. * * Hot spares are a special case, and passed down as an array of disk vdevs, at * the same level as the root of the vdev tree. * * The only function exported by this file is 'make_root_vdev'. The * function performs several passes: * * 1. Construct the vdev specification. Performs syntax validation and * makes sure each device is valid. * 2. Check for devices in use. Using libdiskmgt, makes sure that no * devices are also in use. Some can be overridden using the 'force' * flag, others cannot. * 3. Check for replication errors if the 'force' flag is not specified. * validates that the replication level is consistent across the * entire pool. * 4. Call libzfs to label any whole disks with an EFI label. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" #include int check_device(const char *name, boolean_t force, boolean_t isspare, boolean_t iswholedisk) { char path[MAXPATHLEN]; if (strncmp(name, _PATH_DEV, sizeof (_PATH_DEV) - 1) != 0) snprintf(path, sizeof (path), "%s%s", _PATH_DEV, name); else strlcpy(path, name, sizeof (path)); return (check_file(path, force, isspare)); } boolean_t check_sector_size_database(char *path, int *sector_size) { return (0); } void after_zpool_upgrade(zpool_handle_t *zhp) { char bootfs[ZPOOL_MAXPROPLEN]; if (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs, sizeof (bootfs), NULL, B_FALSE) == 0 && strcmp(bootfs, "-") != 0) { (void) printf(gettext("Pool '%s' has the bootfs " "property set, you might need to update\nthe boot " "code. See gptzfsboot(8) and loader.efi(8) for " "details.\n"), zpool_get_name(zhp)); } } + +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (-1); +} + +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (ENOTSUP); +} diff --git a/cmd/zpool/os/linux/zpool_vdev_os.c b/cmd/zpool/os/linux/zpool_vdev_os.c index da87aa79f365..cfaeef56a20e 100644 --- a/cmd/zpool/os/linux/zpool_vdev_os.c +++ b/cmd/zpool/os/linux/zpool_vdev_os.c @@ -1,412 +1,667 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright (c) 2016, 2017 Intel Corporation. * Copyright 2016 Igor Kozhukhov . */ /* * Functions to convert between a list of vdevs and an nvlist representing the * configuration. Each entry in the list can be one of: * * Device vdevs * disk=(path=..., devid=...) * file=(path=...) * * Group vdevs * raidz[1|2]=(...) * mirror=(...) * * Hot spares * * While the underlying implementation supports it, group vdevs cannot contain * other group vdevs. All userland verification of devices is contained within * this file. If successful, the nvlist returned can be passed directly to the * kernel; we've done as much verification as possible in userland. * * Hot spares are a special case, and passed down as an array of disk vdevs, at * the same level as the root of the vdev tree. * * The only function exported by this file is 'make_root_vdev'. The * function performs several passes: * * 1. Construct the vdev specification. Performs syntax validation and * makes sure each device is valid. * 2. Check for devices in use. Using libblkid to make sure that no * devices are also in use. Some can be overridden using the 'force' * flag, others cannot. * 3. Check for replication errors if the 'force' flag is not specified. * validates that the replication level is consistent across the * entire pool. * 4. Call libzfs to label any whole disks with an EFI label. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" #include #include #include #include #include #include #include #include #include typedef struct vdev_disk_db_entry { char id[24]; int sector_size; } vdev_disk_db_entry_t; /* * Database of block devices that lie about physical sector sizes. The * identification string must be precisely 24 characters to avoid false * negatives */ static vdev_disk_db_entry_t vdev_disk_database[] = { {"ATA ADATA SSD S396 3", 8192}, {"ATA APPLE SSD SM128E", 8192}, {"ATA APPLE SSD SM256E", 8192}, {"ATA APPLE SSD SM512E", 8192}, {"ATA APPLE SSD SM768E", 8192}, {"ATA C400-MTFDDAC064M", 8192}, {"ATA C400-MTFDDAC128M", 8192}, {"ATA C400-MTFDDAC256M", 8192}, {"ATA C400-MTFDDAC512M", 8192}, {"ATA Corsair Force 3 ", 8192}, {"ATA Corsair Force GS", 8192}, {"ATA INTEL SSDSA2CT04", 8192}, {"ATA INTEL SSDSA2BZ10", 8192}, {"ATA INTEL SSDSA2BZ20", 8192}, {"ATA INTEL SSDSA2BZ30", 8192}, {"ATA INTEL SSDSA2CW04", 8192}, {"ATA INTEL SSDSA2CW08", 8192}, {"ATA INTEL SSDSA2CW12", 8192}, {"ATA INTEL SSDSA2CW16", 8192}, {"ATA INTEL SSDSA2CW30", 8192}, {"ATA INTEL SSDSA2CW60", 8192}, {"ATA INTEL SSDSC2CT06", 8192}, {"ATA INTEL SSDSC2CT12", 8192}, {"ATA INTEL SSDSC2CT18", 8192}, {"ATA INTEL SSDSC2CT24", 8192}, {"ATA INTEL SSDSC2CW06", 8192}, {"ATA INTEL SSDSC2CW12", 8192}, {"ATA INTEL SSDSC2CW18", 8192}, {"ATA INTEL SSDSC2CW24", 8192}, {"ATA INTEL SSDSC2CW48", 8192}, {"ATA KINGSTON SH100S3", 8192}, {"ATA KINGSTON SH103S3", 8192}, {"ATA M4-CT064M4SSD2 ", 8192}, {"ATA M4-CT128M4SSD2 ", 8192}, {"ATA M4-CT256M4SSD2 ", 8192}, {"ATA M4-CT512M4SSD2 ", 8192}, {"ATA OCZ-AGILITY2 ", 8192}, {"ATA OCZ-AGILITY3 ", 8192}, {"ATA OCZ-VERTEX2 3.5 ", 8192}, {"ATA OCZ-VERTEX3 ", 8192}, {"ATA OCZ-VERTEX3 LT ", 8192}, {"ATA OCZ-VERTEX3 MI ", 8192}, {"ATA OCZ-VERTEX4 ", 8192}, {"ATA SAMSUNG MZ7WD120", 8192}, {"ATA SAMSUNG MZ7WD240", 8192}, {"ATA SAMSUNG MZ7WD480", 8192}, {"ATA SAMSUNG MZ7WD960", 8192}, {"ATA SAMSUNG SSD 830 ", 8192}, {"ATA Samsung SSD 840 ", 8192}, {"ATA SanDisk SSD U100", 8192}, {"ATA TOSHIBA THNSNH06", 8192}, {"ATA TOSHIBA THNSNH12", 8192}, {"ATA TOSHIBA THNSNH25", 8192}, {"ATA TOSHIBA THNSNH51", 8192}, {"ATA APPLE SSD TS064C", 4096}, {"ATA APPLE SSD TS128C", 4096}, {"ATA APPLE SSD TS256C", 4096}, {"ATA APPLE SSD TS512C", 4096}, {"ATA INTEL SSDSA2M040", 4096}, {"ATA INTEL SSDSA2M080", 4096}, {"ATA INTEL SSDSA2M160", 4096}, {"ATA INTEL SSDSC2MH12", 4096}, {"ATA INTEL SSDSC2MH25", 4096}, {"ATA OCZ CORE_SSD ", 4096}, {"ATA OCZ-VERTEX ", 4096}, {"ATA SAMSUNG MCCOE32G", 4096}, {"ATA SAMSUNG MCCOE64G", 4096}, {"ATA SAMSUNG SSD PM80", 4096}, /* Flash drives optimized for 4KB IOs on larger pages */ {"ATA INTEL SSDSC2BA10", 4096}, {"ATA INTEL SSDSC2BA20", 4096}, {"ATA INTEL SSDSC2BA40", 4096}, {"ATA INTEL SSDSC2BA80", 4096}, {"ATA INTEL SSDSC2BB08", 4096}, {"ATA INTEL SSDSC2BB12", 4096}, {"ATA INTEL SSDSC2BB16", 4096}, {"ATA INTEL SSDSC2BB24", 4096}, {"ATA INTEL SSDSC2BB30", 4096}, {"ATA INTEL SSDSC2BB40", 4096}, {"ATA INTEL SSDSC2BB48", 4096}, {"ATA INTEL SSDSC2BB60", 4096}, {"ATA INTEL SSDSC2BB80", 4096}, {"ATA INTEL SSDSC2BW24", 4096}, {"ATA INTEL SSDSC2BW48", 4096}, {"ATA INTEL SSDSC2BP24", 4096}, {"ATA INTEL SSDSC2BP48", 4096}, {"NA SmrtStorSDLKAE9W", 4096}, {"NVMe Amazon EC2 NVMe ", 4096}, /* Imported from Open Solaris */ {"ATA MARVELL SD88SA02", 4096}, /* Advanced format Hard drives */ {"ATA Hitachi HDS5C303", 4096}, {"ATA SAMSUNG HD204UI ", 4096}, {"ATA ST2000DL004 HD20", 4096}, {"ATA WDC WD10EARS-00M", 4096}, {"ATA WDC WD10EARS-00S", 4096}, {"ATA WDC WD10EARS-00Z", 4096}, {"ATA WDC WD15EARS-00M", 4096}, {"ATA WDC WD15EARS-00S", 4096}, {"ATA WDC WD15EARS-00Z", 4096}, {"ATA WDC WD20EARS-00M", 4096}, {"ATA WDC WD20EARS-00S", 4096}, {"ATA WDC WD20EARS-00Z", 4096}, {"ATA WDC WD1600BEVT-0", 4096}, {"ATA WDC WD2500BEVT-0", 4096}, {"ATA WDC WD3200BEVT-0", 4096}, {"ATA WDC WD5000BEVT-0", 4096}, }; #define INQ_REPLY_LEN 96 #define INQ_CMD_LEN 6 static const int vdev_disk_database_size = sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]); boolean_t check_sector_size_database(char *path, int *sector_size) { unsigned char inq_buff[INQ_REPLY_LEN]; unsigned char sense_buffer[32]; unsigned char inq_cmd_blk[INQ_CMD_LEN] = {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0}; sg_io_hdr_t io_hdr; int error; int fd; int i; /* Prepare INQUIRY command */ memset(&io_hdr, 0, sizeof (sg_io_hdr_t)); io_hdr.interface_id = 'S'; io_hdr.cmd_len = sizeof (inq_cmd_blk); io_hdr.mx_sb_len = sizeof (sense_buffer); io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; io_hdr.dxfer_len = INQ_REPLY_LEN; io_hdr.dxferp = inq_buff; io_hdr.cmdp = inq_cmd_blk; io_hdr.sbp = sense_buffer; io_hdr.timeout = 10; /* 10 milliseconds is ample time */ if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) return (B_FALSE); error = ioctl(fd, SG_IO, (unsigned long) &io_hdr); (void) close(fd); if (error < 0) return (B_FALSE); if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK) return (B_FALSE); for (i = 0; i < vdev_disk_database_size; i++) { if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24)) continue; *sector_size = vdev_disk_database[i].sector_size; return (B_TRUE); } return (B_FALSE); } static int check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare) { int err; char *value; /* No valid type detected device is safe to use */ value = blkid_get_tag_value(cache, "TYPE", path); if (value == NULL) return (0); /* * If libblkid detects a ZFS device, we check the device * using check_file() to see if it's safe. The one safe * case is a spare device shared between multiple pools. */ if (strcmp(value, "zfs_member") == 0) { err = check_file(path, force, isspare); } else { if (force) { err = 0; } else { err = -1; vdev_error(gettext("%s contains a filesystem of " "type '%s'\n"), path, value); } } free(value); return (err); } /* * Validate that a disk including all partitions are safe to use. * * For EFI labeled disks this can done relatively easily with the libefi * library. The partition numbers are extracted from the label and used * to generate the expected /dev/ paths. Each partition can then be * checked for conflicts. * * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible * but due to the lack of a readily available libraries this scanning is * not implemented. Instead only the device path as given is checked. */ static int check_disk(const char *path, blkid_cache cache, int force, boolean_t isspare, boolean_t iswholedisk) { struct dk_gpt *vtoc; char slice_path[MAXPATHLEN]; int err = 0; int fd, i; int flags = O_RDONLY|O_DIRECT; if (!iswholedisk) return (check_slice(path, cache, force, isspare)); /* only spares can be shared, other devices require exclusive access */ if (!isspare) flags |= O_EXCL; if ((fd = open(path, flags)) < 0) { char *value = blkid_get_tag_value(cache, "TYPE", path); (void) fprintf(stderr, gettext("%s is in use and contains " "a %s filesystem.\n"), path, value ? value : "unknown"); free(value); return (-1); } /* * Expected to fail for non-EFI labeled disks. Just check the device * as given and do not attempt to detect and scan partitions. */ err = efi_alloc_and_read(fd, &vtoc); if (err) { (void) close(fd); return (check_slice(path, cache, force, isspare)); } /* * The primary efi partition label is damaged however the secondary * label at the end of the device is intact. Rather than use this * label we should play it safe and treat this as a non efi device. */ if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { efi_free(vtoc); (void) close(fd); if (force) { /* Partitions will now be created using the backup */ return (0); } else { vdev_error(gettext("%s contains a corrupt primary " "EFI label.\n"), path); return (-1); } } for (i = 0; i < vtoc->efi_nparts; i++) { if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED || uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) continue; if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) (void) snprintf(slice_path, sizeof (slice_path), "%s%s%d", path, "-part", i+1); else (void) snprintf(slice_path, sizeof (slice_path), "%s%s%d", path, isdigit(path[strlen(path)-1]) ? "p" : "", i+1); err = check_slice(slice_path, cache, force, isspare); if (err) break; } efi_free(vtoc); (void) close(fd); return (err); } int check_device(const char *path, boolean_t force, boolean_t isspare, boolean_t iswholedisk) { blkid_cache cache; int error; error = blkid_get_cache(&cache, NULL); if (error != 0) { (void) fprintf(stderr, gettext("unable to access the blkid " "cache.\n")); return (-1); } error = check_disk(path, cache, force, isspare, iswholedisk); blkid_put_cache(cache); return (error); } void after_zpool_upgrade(zpool_handle_t *zhp) { } + +/* + * Read from a sysfs file and return an allocated string. Removes + * the newline from the end of the string if there is one. + * + * Returns a string on success (which must be freed), or NULL on error. + */ +static char *zpool_sysfs_gets(char *path) +{ + int fd; + struct stat statbuf; + char *buf = NULL; + ssize_t count = 0; + fd = open(path, O_RDONLY); + if (fd < 0) + return (NULL); + + if (fstat(fd, &statbuf) != 0) { + close(fd); + return (NULL); + } + + buf = calloc(sizeof (*buf), statbuf.st_size + 1); + if (buf == NULL) { + close(fd); + return (NULL); + } + + /* + * Note, we can read less bytes than st_size, and that's ok. Sysfs + * files will report their size is 4k even if they only return a small + * string. + */ + count = read(fd, buf, statbuf.st_size); + if (count < 0) { + /* Error doing read() or we overran the buffer */ + close(fd); + free(buf); + return (NULL); + } + + /* Remove trailing newline */ + if (buf[count - 1] == '\n') + buf[count - 1] = 0; + + close(fd); + + return (buf); +} + +/* + * Write a string to a sysfs file. + * + * Returns 0 on success, non-zero otherwise. + */ +static int zpool_sysfs_puts(char *path, char *str) +{ + FILE *file; + + file = fopen(path, "w"); + if (!file) { + return (-1); + } + + if (fputs(str, file) < 0) { + fclose(file); + return (-2); + } + fclose(file); + return (0); +} + +/* Given a vdev nvlist_t, rescan its enclosure sysfs path */ +static void +rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv) +{ + update_vdev_config_dev_sysfs_path(vdev_nv, + fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH), + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); +} + +/* + * Given a power string: "on", "off", "1", or "0", return 0 if it's an + * off value, 1 if it's an on value, and -1 if the value is unrecognized. + */ +static int zpool_power_parse_value(char *str) +{ + if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0)) + return (0); + + if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0)) + return (1); + + return (-1); +} + +/* + * Given a vdev string return an allocated string containing the sysfs path to + * its power control file. Also do a check if the power control file really + * exists and has correct permissions. + * + * Example returned strings: + * + * /sys/class/enclosure/0:0:122:0/10/power_status + * /sys/bus/pci/slots/10/power + * + * Returns allocated string on success (which must be freed), NULL on failure. + */ +static char * +zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev) +{ + char *enc_sysfs_dir = NULL; + char *path = NULL; + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + + if (vdev_nv == NULL) { + return (NULL); + } + + /* Make sure we're getting the updated enclosure sysfs path */ + rescan_vdev_config_dev_sysfs_path(vdev_nv); + + if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_dir) != 0) { + return (NULL); + } + + if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1) + return (NULL); + + if (access(path, W_OK) != 0) { + free(path); + path = NULL; + /* No HDD 'power_control' file, maybe it's NVMe? */ + if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) { + return (NULL); + } + + if (access(path, R_OK | W_OK) != 0) { + /* Not NVMe either */ + free(path); + return (NULL); + } + } + + return (path); +} + +/* + * Given a path to a sysfs power control file, return B_TRUE if you should use + * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control). + */ +static boolean_t +zpool_power_use_word(char *sysfs_path) +{ + if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")], + "power_status") == 0) { + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Check the sysfs power control value for a vdev. + * + * Returns: + * 0 - Power is off + * 1 - Power is on + * -1 - Error or unsupported + */ +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + char *val; + int rc; + + char *path = zpool_power_sysfs_path(zhp, vdev); + if (path == NULL) + return (-1); + + val = zpool_sysfs_gets(path); + if (val == NULL) { + free(path); + return (-1); + } + + rc = zpool_power_parse_value(val); + free(val); + free(path); + return (rc); +} + +/* + * Turn on or off the slot to a device + * + * Device path is the full path to the device (like /dev/sda or /dev/sda1). + * + * Return code: + * 0: Success + * ENOTSUP: Power control not supported for OS + * EBADSLT: Couldn't read current power state + * ENOENT: No sysfs path to power control + * EIO: Couldn't write sysfs power value + * EBADE: Sysfs power value didn't change + */ +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + char *sysfs_path; + const char *val; + int rc; + int timeout_ms; + + rc = zpool_power_current_state(zhp, vdev); + if (rc == -1) { + return (EBADSLT); + } + + /* Already correct value? */ + if (rc == (int)turn_on) + return (0); + + sysfs_path = zpool_power_sysfs_path(zhp, vdev); + if (sysfs_path == NULL) + return (ENOENT); + + if (zpool_power_use_word(sysfs_path)) { + val = turn_on ? "on" : "off"; + } else { + val = turn_on ? "1" : "0"; + } + + rc = zpool_sysfs_puts(sysfs_path, (char *)val); + + free(sysfs_path); + if (rc != 0) { + return (EIO); + } + + /* + * Wait up to 30 seconds for sysfs power value to change after + * writing it. + */ + timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000); + for (int i = 0; i < MAX(1, timeout_ms / 200); i++) { + rc = zpool_power_current_state(zhp, vdev); + if (rc == (int)turn_on) + return (0); /* success */ + + fsleep(0.200); /* 200ms */ + } + + /* sysfs value never changed */ + return (EBADE); +} diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index 486b8a1ac2b5..82250f692700 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -1,704 +1,708 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2016 Igor Kozhukhov . */ #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" /* * Private interface for iterating over pools specified on the command line. * Most consumers will call for_each_pool, but in order to support iostat, we * allow fined grained control through the zpool_list_t interface. */ typedef struct zpool_node { zpool_handle_t *zn_handle; uu_avl_node_t zn_avlnode; int zn_mark; } zpool_node_t; struct zpool_list { boolean_t zl_findall; boolean_t zl_literal; uu_avl_t *zl_avl; uu_avl_pool_t *zl_pool; zprop_list_t **zl_proplist; }; /* ARGSUSED */ static int zpool_compare(const void *larg, const void *rarg, void *unused) { zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; const char *lname = zpool_get_name(l); const char *rname = zpool_get_name(r); return (strcmp(lname, rname)); } /* * Callback function for pool_list_get(). Adds the given pool to the AVL tree * of known pools. */ static int add_pool(zpool_handle_t *zhp, void *data) { zpool_list_t *zlp = data; zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); uu_avl_index_t idx; node->zn_handle = zhp; uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { if (zlp->zl_proplist && zpool_expand_proplist(zhp, zlp->zl_proplist, zlp->zl_literal) != 0) { zpool_close(zhp); free(node); return (-1); } uu_avl_insert(zlp->zl_avl, node, idx); } else { zpool_close(zhp); free(node); return (-1); } return (0); } /* * Create a list of pools based on the given arguments. If we're given no * arguments, then iterate over all pools in the system and add them to the AVL * tree. Otherwise, add only those pool explicitly specified on the command * line. */ zpool_list_t * pool_list_get(int argc, char **argv, zprop_list_t **proplist, boolean_t literal, int *err) { zpool_list_t *zlp; zlp = safe_malloc(sizeof (zpool_list_t)); zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t), offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT); if (zlp->zl_pool == NULL) zpool_no_memory(); if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL, UU_DEFAULT)) == NULL) zpool_no_memory(); zlp->zl_proplist = proplist; zlp->zl_literal = literal; if (argc == 0) { (void) zpool_iter(g_zfs, add_pool, zlp); zlp->zl_findall = B_TRUE; } else { int i; for (i = 0; i < argc; i++) { zpool_handle_t *zhp; if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != NULL) { if (add_pool(zhp, zlp) != 0) *err = B_TRUE; } else { *err = B_TRUE; } } } return (zlp); } /* * Search for any new pools, adding them to the list. We only add pools when no * options were given on the command line. Otherwise, we keep the list fixed as * those that were explicitly specified. */ void pool_list_update(zpool_list_t *zlp) { if (zlp->zl_findall) (void) zpool_iter(g_zfs, add_pool, zlp); } /* * Iterate over all pools in the list, executing the callback for each */ int pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, void *data) { zpool_node_t *node, *next_node; int ret = 0; for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) { next_node = uu_avl_next(zlp->zl_avl, node); if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL || unavail) ret |= func(node->zn_handle, data); } return (ret); } /* * Remove the given pool from the list. When running iostat, we want to remove * those pools that no longer exist. */ void pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) { zpool_node_t search, *node; search.zn_handle = zhp; if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { uu_avl_remove(zlp->zl_avl, node); zpool_close(node->zn_handle); free(node); } } /* * Free all the handles associated with this list. */ void pool_list_free(zpool_list_t *zlp) { uu_avl_walk_t *walk; zpool_node_t *node; if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) { (void) fprintf(stderr, gettext("internal error: out of memory")); exit(1); } while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(zlp->zl_avl, node); zpool_close(node->zn_handle); free(node); } uu_avl_walk_end(walk); uu_avl_destroy(zlp->zl_avl); uu_avl_pool_destroy(zlp->zl_pool); free(zlp); } /* * Returns the number of elements in the pool list. */ int pool_list_count(zpool_list_t *zlp) { return (uu_avl_numnodes(zlp->zl_avl)); } /* * High level function which iterates over all pools given on the command line, * using the pool_list_* interfaces. */ int for_each_pool(int argc, char **argv, boolean_t unavail, zprop_list_t **proplist, boolean_t literal, zpool_iter_f func, void *data) { zpool_list_t *list; int ret = 0; if ((list = pool_list_get(argc, argv, proplist, literal, &ret)) == NULL) return (1); if (pool_list_iter(list, unavail, func, data) != 0) ret = 1; pool_list_free(list); return (ret); } /* * This is the equivalent of for_each_pool() for vdevs. It iterates thorough * all vdevs in the pool, ignoring root vdevs and holes, calling func() on * each one. * * @zhp: Zpool handle * @func: Function to call on each vdev * @data: Custom data to pass to the function */ int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data) { nvlist_t *config, *nvroot = NULL; if ((config = zpool_get_config(zhp, NULL)) != NULL) { verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); } return (for_each_vdev_cb((void *) zhp, nvroot, func, data)); } /* * Process the vcdl->vdev_cmd_data[] array to figure out all the unique column * names and their widths. When this function is done, vcdl->uniq_cols, * vcdl->uniq_cols_cnt, and vcdl->uniq_cols_width will be filled in. */ static void process_unique_cmd_columns(vdev_cmd_data_list_t *vcdl) { char **uniq_cols = NULL, **tmp = NULL; int *uniq_cols_width; vdev_cmd_data_t *data; int cnt = 0; int k; /* For each vdev */ for (int i = 0; i < vcdl->count; i++) { data = &vcdl->data[i]; /* For each column the vdev reported */ for (int j = 0; j < data->cols_cnt; j++) { /* Is this column in our list of unique column names? */ for (k = 0; k < cnt; k++) { if (strcmp(data->cols[j], uniq_cols[k]) == 0) break; /* yes it is */ } if (k == cnt) { /* No entry for column, add to list */ tmp = realloc(uniq_cols, sizeof (*uniq_cols) * (cnt + 1)); if (tmp == NULL) break; /* Nothing we can do... */ uniq_cols = tmp; uniq_cols[cnt] = data->cols[j]; cnt++; } } } /* * We now have a list of all the unique column names. Figure out the * max width of each column by looking at the column name and all its * values. */ uniq_cols_width = safe_malloc(sizeof (*uniq_cols_width) * cnt); for (int i = 0; i < cnt; i++) { /* Start off with the column title's width */ uniq_cols_width[i] = strlen(uniq_cols[i]); /* For each vdev */ for (int j = 0; j < vcdl->count; j++) { /* For each of the vdev's values in a column */ data = &vcdl->data[j]; for (k = 0; k < data->cols_cnt; k++) { /* Does this vdev have a value for this col? */ if (strcmp(data->cols[k], uniq_cols[i]) == 0) { /* Is the value width larger? */ uniq_cols_width[i] = MAX(uniq_cols_width[i], strlen(data->lines[k])); } } } } vcdl->uniq_cols = uniq_cols; vcdl->uniq_cols_cnt = cnt; vcdl->uniq_cols_width = uniq_cols_width; } /* * Process a line of command output * * When running 'zpool iostat|status -c' the lines of output can either be * in the form of: * * column_name=value * * Or just: * * value * * Process the column_name (if any) and value. * * Returns 0 if line was processed, and there are more lines can still be * processed. * * Returns 1 if this was the last line to process, or error. */ static int vdev_process_cmd_output(vdev_cmd_data_t *data, char *line) { char *col = NULL; char *val = line; char *equals; char **tmp; if (line == NULL) return (1); equals = strchr(line, '='); if (equals != NULL) { /* * We have a 'column=value' type line. Split it into the * column and value strings by turning the '=' into a '\0'. */ *equals = '\0'; col = line; val = equals + 1; } else { val = line; } /* Do we already have a column by this name? If so, skip it. */ if (col != NULL) { for (int i = 0; i < data->cols_cnt; i++) { if (strcmp(col, data->cols[i]) == 0) return (0); /* Duplicate, skip */ } } if (val != NULL) { tmp = realloc(data->lines, (data->lines_cnt + 1) * sizeof (*data->lines)); if (tmp == NULL) return (1); data->lines = tmp; data->lines[data->lines_cnt] = strdup(val); data->lines_cnt++; } if (col != NULL) { tmp = realloc(data->cols, (data->cols_cnt + 1) * sizeof (*data->cols)); if (tmp == NULL) return (1); data->cols = tmp; data->cols[data->cols_cnt] = strdup(col); data->cols_cnt++; } if (val != NULL && col == NULL) return (1); return (0); } /* * Run the cmd and store results in *data. */ static void vdev_run_cmd(vdev_cmd_data_t *data, char *cmd) { int rc; char *argv[2] = {cmd}; char **env; char **lines = NULL; int lines_cnt = 0; int i; env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath, data->vdev_enc_sysfs_path, NULL, NULL); if (env == NULL) goto out; /* Run the command */ rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines, &lines_cnt); zpool_vdev_script_free_env(env); if (rc != 0) goto out; /* Process the output we got */ for (i = 0; i < lines_cnt; i++) if (vdev_process_cmd_output(data, lines[i]) != 0) break; out: if (lines != NULL) libzfs_free_str_array(lines, lines_cnt); } /* * Generate the search path for zpool iostat/status -c scripts. * The string returned must be freed. */ char * zpool_get_cmd_search_path(void) { const char *env; char *sp = NULL; env = getenv("ZPOOL_SCRIPTS_PATH"); if (env != NULL) return (strdup(env)); env = getenv("HOME"); if (env != NULL) { if (asprintf(&sp, "%s/.zpool.d:%s", env, ZPOOL_SCRIPTS_DIR) != -1) { return (sp); } } if (asprintf(&sp, "%s", ZPOOL_SCRIPTS_DIR) != -1) return (sp); return (NULL); } /* Thread function run for each vdev */ static void vdev_run_cmd_thread(void *cb_cmd_data) { vdev_cmd_data_t *data = cb_cmd_data; char *cmd = NULL, *cmddup, *cmdrest; cmddup = strdup(data->cmd); if (cmddup == NULL) return; cmdrest = cmddup; while ((cmd = strtok_r(cmdrest, ",", &cmdrest))) { char *dir = NULL, *sp, *sprest; char fullpath[MAXPATHLEN]; if (strchr(cmd, '/') != NULL) continue; sp = zpool_get_cmd_search_path(); if (sp == NULL) continue; sprest = sp; while ((dir = strtok_r(sprest, ":", &sprest))) { if (snprintf(fullpath, sizeof (fullpath), "%s/%s", dir, cmd) == -1) continue; if (access(fullpath, X_OK) == 0) { vdev_run_cmd(data, fullpath); break; } } free(sp); } free(cmddup); } /* For each vdev in the pool run a command */ static int for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl) { vdev_cmd_data_list_t *vcdl = cb_vcdl; vdev_cmd_data_t *data; char *path = NULL; char *vname = NULL; char *vdev_enc_sysfs_path = NULL; int i, match = 0; zpool_handle_t *zhp = zhp_data; if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); + /* Make sure we're getting the updated enclosure sysfs path */ + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &vdev_enc_sysfs_path); /* Spares show more than once if they're in use, so skip if exists */ for (i = 0; i < vcdl->count; i++) { if ((strcmp(vcdl->data[i].path, path) == 0) && (strcmp(vcdl->data[i].pool, zpool_get_name(zhp)) == 0)) { /* vdev already exists, skip it */ return (0); } } /* Check for selected vdevs here, if any */ for (i = 0; i < vcdl->vdev_names_count; i++) { vname = zpool_vdev_name(g_zfs, zhp, nv, vcdl->cb_name_flags); if (strcmp(vcdl->vdev_names[i], vname) == 0) { free(vname); match = 1; break; /* match */ } free(vname); } /* If we selected vdevs, and this isn't one of them, then bail out */ if (!match && vcdl->vdev_names_count) return (0); /* * Resize our array and add in the new element. */ if (!(vcdl->data = realloc(vcdl->data, sizeof (*vcdl->data) * (vcdl->count + 1)))) return (ENOMEM); /* couldn't realloc */ data = &vcdl->data[vcdl->count]; data->pool = strdup(zpool_get_name(zhp)); data->path = strdup(path); data->upath = zfs_get_underlying_path(path); data->cmd = vcdl->cmd; data->lines = data->cols = NULL; data->lines_cnt = data->cols_cnt = 0; if (vdev_enc_sysfs_path) data->vdev_enc_sysfs_path = strdup(vdev_enc_sysfs_path); else data->vdev_enc_sysfs_path = NULL; vcdl->count++; return (0); } /* Get the names and count of the vdevs */ static int all_pools_for_each_vdev_gather_cb(zpool_handle_t *zhp, void *cb_vcdl) { return (for_each_vdev(zhp, for_each_vdev_run_cb, cb_vcdl)); } /* * Now that vcdl is populated with our complete list of vdevs, spawn * off the commands. */ static void all_pools_for_each_vdev_run_vcdl(vdev_cmd_data_list_t *vcdl) { tpool_t *t; t = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); if (t == NULL) return; /* Spawn off the command for each vdev */ for (int i = 0; i < vcdl->count; i++) { (void) tpool_dispatch(t, vdev_run_cmd_thread, (void *) &vcdl->data[i]); } /* Wait for threads to finish */ tpool_wait(t); tpool_destroy(t); } /* * Run command 'cmd' on all vdevs in all pools in argv. Saves the first line of * output from the command in vcdk->data[].line for all vdevs. If you want * to run the command on only certain vdevs, fill in g_zfs, vdev_names, * vdev_names_count, and cb_name_flags. Otherwise leave them as zero. * * Returns a vdev_cmd_data_list_t that must be freed with * free_vdev_cmd_data_list(); */ vdev_cmd_data_list_t * all_pools_for_each_vdev_run(int argc, char **argv, char *cmd, libzfs_handle_t *g_zfs, char **vdev_names, int vdev_names_count, int cb_name_flags) { vdev_cmd_data_list_t *vcdl; vcdl = safe_malloc(sizeof (vdev_cmd_data_list_t)); vcdl->cmd = cmd; vcdl->vdev_names = vdev_names; vcdl->vdev_names_count = vdev_names_count; vcdl->cb_name_flags = cb_name_flags; vcdl->g_zfs = g_zfs; /* Gather our list of all vdevs in all pools */ for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, all_pools_for_each_vdev_gather_cb, vcdl); /* Run command on all vdevs in all pools */ all_pools_for_each_vdev_run_vcdl(vcdl); /* * vcdl->data[] now contains all the column names and values for each * vdev. We need to process that into a master list of unique column * names, and figure out the width of each column. */ process_unique_cmd_columns(vcdl); return (vcdl); } /* * Free the vdev_cmd_data_list_t created by all_pools_for_each_vdev_run() */ void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl) { free(vcdl->uniq_cols); free(vcdl->uniq_cols_width); for (int i = 0; i < vcdl->count; i++) { free(vcdl->data[i].path); free(vcdl->data[i].pool); free(vcdl->data[i].upath); for (int j = 0; j < vcdl->data[i].lines_cnt; j++) free(vcdl->data[i].lines[j]); free(vcdl->data[i].lines); for (int j = 0; j < vcdl->data[i].cols_cnt; j++) free(vcdl->data[i].cols[j]); free(vcdl->data[i].cols); free(vcdl->data[i].vdev_enc_sysfs_path); } free(vcdl->data); free(vcdl); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index a06af9aeceb4..e9c5a4ea4018 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -1,10882 +1,11063 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2012 by Frederik Wessels. All rights reserved. * Copyright (c) 2012 by Cyril Plisko. All rights reserved. * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. * Copyright 2016 Igor Kozhukhov . * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K * Copyright (c) 2021, Colm Buckley * Copyright [2021] Hewlett Packard Enterprise Development LP */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zpool_util.h" #include "zfs_comutil.h" #include "zfeature_common.h" #include "statcommon.h" libzfs_handle_t *g_zfs; static int zpool_do_create(int, char **); static int zpool_do_destroy(int, char **); static int zpool_do_add(int, char **); static int zpool_do_remove(int, char **); static int zpool_do_labelclear(int, char **); static int zpool_do_checkpoint(int, char **); static int zpool_do_list(int, char **); static int zpool_do_iostat(int, char **); static int zpool_do_status(int, char **); static int zpool_do_online(int, char **); static int zpool_do_offline(int, char **); static int zpool_do_clear(int, char **); static int zpool_do_reopen(int, char **); static int zpool_do_reguid(int, char **); static int zpool_do_attach(int, char **); static int zpool_do_detach(int, char **); static int zpool_do_replace(int, char **); static int zpool_do_split(int, char **); static int zpool_do_initialize(int, char **); static int zpool_do_scrub(int, char **); static int zpool_do_resilver(int, char **); static int zpool_do_trim(int, char **); static int zpool_do_import(int, char **); static int zpool_do_export(int, char **); static int zpool_do_upgrade(int, char **); static int zpool_do_history(int, char **); static int zpool_do_events(int, char **); static int zpool_do_get(int, char **); static int zpool_do_set(int, char **); static int zpool_do_sync(int, char **); static int zpool_do_version(int, char **); static int zpool_do_wait(int, char **); static zpool_compat_status_t zpool_do_load_compat( const char *, boolean_t *); /* * These libumem hooks provide a reasonable set of defaults for the allocator's * debugging facilities. */ #ifdef DEBUG const char * _umem_debug_init(void) { return ("default,verbose"); /* $UMEM_DEBUG setting */ } const char * _umem_logging_init(void) { return ("fail,contents"); /* $UMEM_LOGGING setting */ } #endif typedef enum { HELP_ADD, HELP_ATTACH, HELP_CLEAR, HELP_CREATE, HELP_CHECKPOINT, HELP_DESTROY, HELP_DETACH, HELP_EXPORT, HELP_HISTORY, HELP_IMPORT, HELP_IOSTAT, HELP_LABELCLEAR, HELP_LIST, HELP_OFFLINE, HELP_ONLINE, HELP_REPLACE, HELP_REMOVE, HELP_INITIALIZE, HELP_SCRUB, HELP_RESILVER, HELP_TRIM, HELP_STATUS, HELP_UPGRADE, HELP_EVENTS, HELP_GET, HELP_SET, HELP_SPLIT, HELP_SYNC, HELP_REGUID, HELP_REOPEN, HELP_VERSION, HELP_WAIT } zpool_help_t; /* * Flags for stats to display with "zpool iostats" */ enum iostat_type { IOS_DEFAULT = 0, IOS_LATENCY = 1, IOS_QUEUES = 2, IOS_L_HISTO = 3, IOS_RQ_HISTO = 4, IOS_COUNT, /* always last element */ }; /* iostat_type entries as bitmasks */ #define IOS_DEFAULT_M (1ULL << IOS_DEFAULT) #define IOS_LATENCY_M (1ULL << IOS_LATENCY) #define IOS_QUEUES_M (1ULL << IOS_QUEUES) #define IOS_L_HISTO_M (1ULL << IOS_L_HISTO) #define IOS_RQ_HISTO_M (1ULL << IOS_RQ_HISTO) /* Mask of all the histo bits */ #define IOS_ANYHISTO_M (IOS_L_HISTO_M | IOS_RQ_HISTO_M) /* * Lookup table for iostat flags to nvlist names. Basically a list * of all the nvlists a flag requires. Also specifies the order in * which data gets printed in zpool iostat. */ static const char *vsx_type_to_nvlist[IOS_COUNT][13] = { [IOS_L_HISTO] = { ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, NULL}, [IOS_LATENCY] = { ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, NULL}, [IOS_QUEUES] = { ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE, NULL}, [IOS_RQ_HISTO] = { ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, NULL}, }; /* * Given a cb->cb_flags with a histogram bit set, return the iostat_type. * Right now, only one histo bit is ever set at one time, so we can * just do a highbit64(a) */ #define IOS_HISTO_IDX(a) (highbit64(a & IOS_ANYHISTO_M) - 1) typedef struct zpool_command { const char *name; int (*func)(int, char **); zpool_help_t usage; } zpool_command_t; /* * Master command table. Each ZFS command has a name, associated function, and * usage message. The usage messages need to be internationalized, so we have * to have a function to return the usage message based on a command index. * * These commands are organized according to how they are displayed in the usage * message. An empty command (one with a NULL name) indicates an empty line in * the generic usage message. */ static zpool_command_t command_table[] = { { "version", zpool_do_version, HELP_VERSION }, { NULL }, { "create", zpool_do_create, HELP_CREATE }, { "destroy", zpool_do_destroy, HELP_DESTROY }, { NULL }, { "add", zpool_do_add, HELP_ADD }, { "remove", zpool_do_remove, HELP_REMOVE }, { NULL }, { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, { NULL }, { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, { NULL }, { "list", zpool_do_list, HELP_LIST }, { "iostat", zpool_do_iostat, HELP_IOSTAT }, { "status", zpool_do_status, HELP_STATUS }, { NULL }, { "online", zpool_do_online, HELP_ONLINE }, { "offline", zpool_do_offline, HELP_OFFLINE }, { "clear", zpool_do_clear, HELP_CLEAR }, { "reopen", zpool_do_reopen, HELP_REOPEN }, { NULL }, { "attach", zpool_do_attach, HELP_ATTACH }, { "detach", zpool_do_detach, HELP_DETACH }, { "replace", zpool_do_replace, HELP_REPLACE }, { "split", zpool_do_split, HELP_SPLIT }, { NULL }, { "initialize", zpool_do_initialize, HELP_INITIALIZE }, { "resilver", zpool_do_resilver, HELP_RESILVER }, { "scrub", zpool_do_scrub, HELP_SCRUB }, { "trim", zpool_do_trim, HELP_TRIM }, { NULL }, { "import", zpool_do_import, HELP_IMPORT }, { "export", zpool_do_export, HELP_EXPORT }, { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, { "reguid", zpool_do_reguid, HELP_REGUID }, { NULL }, { "history", zpool_do_history, HELP_HISTORY }, { "events", zpool_do_events, HELP_EVENTS }, { NULL }, { "get", zpool_do_get, HELP_GET }, { "set", zpool_do_set, HELP_SET }, { "sync", zpool_do_sync, HELP_SYNC }, { NULL }, { "wait", zpool_do_wait, HELP_WAIT }, }; #define NCOMMAND (ARRAY_SIZE(command_table)) #define VDEV_ALLOC_CLASS_LOGS "logs" static zpool_command_t *current_command; static char history_str[HIS_MAX_RECORD_LEN]; static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; static const char * get_usage(zpool_help_t idx) { switch (idx) { case HELP_ADD: return (gettext("\tadd [-fgLnP] [-o property=value] " " ...\n")); case HELP_ATTACH: return (gettext("\tattach [-fsw] [-o property=value] " " \n")); case HELP_CLEAR: - return (gettext("\tclear [-nF] [device]\n")); + return (gettext("\tclear [[--power]|[-nF]] [device]\n")); case HELP_CREATE: return (gettext("\tcreate [-fnd] [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" "\t [-m mountpoint] [-R root] ...\n")); case HELP_CHECKPOINT: return (gettext("\tcheckpoint [-d [-w]] ...\n")); case HELP_DESTROY: return (gettext("\tdestroy [-f] \n")); case HELP_DETACH: return (gettext("\tdetach \n")); case HELP_EXPORT: return (gettext("\texport [-af] ...\n")); case HELP_HISTORY: return (gettext("\thistory [-il] [] ...\n")); case HELP_IMPORT: return (gettext("\timport [-d dir] [-D]\n" "\timport [-o mntopts] [-o property=value] ... \n" "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]]\n" "\t [--rewind-to-checkpoint] [newpool]\n")); case HELP_IOSTAT: return (gettext("\tiostat [[[-c [script1,script2,...]" "[-lq]]|[-rw]] [-T d | u] [-ghHLpPvy]\n" "\t [[pool ...]|[pool vdev ...]|[vdev ...]]" " [[-n] interval [count]]\n")); case HELP_LABELCLEAR: return (gettext("\tlabelclear [-f] \n")); case HELP_LIST: return (gettext("\tlist [-gHLpPv] [-o property[,...]] " "[-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_OFFLINE: - return (gettext("\toffline [-f] [-t] ...\n")); + return (gettext("\toffline [--power]|[[-f][-t]] " + " ...\n")); case HELP_ONLINE: - return (gettext("\tonline [-e] ...\n")); + return (gettext("\tonline [--power][-e] " + "...\n")); case HELP_REPLACE: return (gettext("\treplace [-fsw] [-o property=value] " " [new-device]\n")); case HELP_REMOVE: return (gettext("\tremove [-npsw] ...\n")); case HELP_REOPEN: return (gettext("\treopen [-n] \n")); case HELP_INITIALIZE: return (gettext("\tinitialize [-c | -s | -u] [-w] " "[ ...]\n")); case HELP_SCRUB: return (gettext("\tscrub [-s | -p] [-w] ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: return (gettext("\ttrim [-dw] [-r ] [-c | -s] " "[ ...]\n")); case HELP_STATUS: - return (gettext("\tstatus [-c [script1,script2,...]] " + return (gettext("\tstatus [--power] [-c [script1,script2,...]] " "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" "\tupgrade -v\n" "\tupgrade [-V version] <-a | pool ...>\n")); case HELP_EVENTS: return (gettext("\tevents [-vHf [pool] | -c]\n")); case HELP_GET: return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " "<\"all\" | property[,...]> ...\n")); case HELP_SET: return (gettext("\tset \n")); case HELP_SPLIT: return (gettext("\tsplit [-gLnPl] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " "[ ...]\n")); case HELP_REGUID: return (gettext("\treguid \n")); case HELP_SYNC: return (gettext("\tsync [pool] ...\n")); case HELP_VERSION: return (gettext("\tversion\n")); case HELP_WAIT: return (gettext("\twait [-Hp] [-T d|u] [-t [,...]] " " [interval]\n")); } abort(); /* NOTREACHED */ } static void zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res) { uint_t children = 0; nvlist_t **child; uint_t i; (void) nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children); if (children == 0) { char *path = zpool_vdev_name(g_zfs, zhp, nvroot, VDEV_NAME_PATH); if (strcmp(path, VDEV_TYPE_INDIRECT) != 0 && strcmp(path, VDEV_TYPE_HOLE) != 0) fnvlist_add_boolean(res, path); free(path); return; } for (i = 0; i < children; i++) { zpool_collect_leaves(zhp, child[i], res); } } /* * Callback routine that will print out a pool property value. */ static int print_prop_cb(int prop, void *cb) { FILE *fp = cb; (void) fprintf(fp, "\t%-19s ", zpool_prop_to_name(prop)); if (zpool_prop_readonly(prop)) (void) fprintf(fp, " NO "); else (void) fprintf(fp, " YES "); if (zpool_prop_values(prop) == NULL) (void) fprintf(fp, "-\n"); else (void) fprintf(fp, "%s\n", zpool_prop_values(prop)); return (ZPROP_CONT); } +/* + * Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like + * '/dev/disk/by-vdev/L5'. + */ +static const char * +vdev_name_to_path(zpool_handle_t *zhp, char *vdev) +{ + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + if (vdev_nv == NULL) { + return (NULL); + } + return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH)); +} + +static int +zpool_power_on(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_TRUE)); +} + +static int +zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev) +{ + int rc; + + rc = zpool_power_on(zhp, vdev); + if (rc != 0) + return (rc); + + zpool_disk_wait(vdev_name_to_path(zhp, vdev)); + + return (0); +} + +static int +zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp) +{ + nvlist_t *nv; + const char *path = NULL; + int rc; + + /* Power up all the devices first */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + if (path != NULL) { + rc = zpool_power_on(zhp, (char *)path); + if (rc != 0) { + return (rc); + } + } + } + + /* + * Wait for their devices to show up. Since we powered them on + * at roughly the same time, they should all come online around + * the same time. + */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + zpool_disk_wait(path); + } + + return (0); +} + +static int +zpool_power_off(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_FALSE)); +} + /* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display * a complete usage message. */ static void usage(boolean_t requested) { FILE *fp = requested ? stdout : stderr; if (current_command == NULL) { int i; (void) fprintf(fp, gettext("usage: zpool command args ...\n")); (void) fprintf(fp, gettext("where 'command' is one of the following:\n\n")); for (i = 0; i < NCOMMAND; i++) { if (command_table[i].name == NULL) (void) fprintf(fp, "\n"); else (void) fprintf(fp, "%s", get_usage(command_table[i].usage)); } } else { (void) fprintf(fp, gettext("usage:\n")); (void) fprintf(fp, "%s", get_usage(current_command->usage)); } if (current_command != NULL && ((strcmp(current_command->name, "set") == 0) || (strcmp(current_command->name, "get") == 0) || (strcmp(current_command->name, "list") == 0))) { (void) fprintf(fp, gettext("\nthe following properties are supported:\n")); (void) fprintf(fp, "\n\t%-19s %s %s\n\n", "PROPERTY", "EDIT", "VALUES"); /* Iterate over all properties */ (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE, ZFS_TYPE_POOL); (void) fprintf(fp, "\t%-19s ", "feature@..."); (void) fprintf(fp, "YES disabled | enabled | active\n"); (void) fprintf(fp, gettext("\nThe feature@ properties must be " "appended with a feature name.\nSee zpool-features(7).\n")); } /* * See comments at end of main(). */ if (getenv("ZFS_ABORT") != NULL) { (void) printf("dumping core by request\n"); abort(); } exit(requested ? 0 : 2); } /* * zpool initialize [-c | -s | -u] [-w] [ ...] * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool * if none specified. * * -c Cancel. Ends active initializing. * -s Suspend. Initializing can then be restarted with no flags. * -u Uninitialize. Clears initialization state. * -w Wait. Blocks until initializing has completed. */ int zpool_do_initialize(int argc, char **argv) { int c; char *poolname; zpool_handle_t *zhp; nvlist_t *vdevs; int err = 0; boolean_t wait = B_FALSE; struct option long_options[] = { {"cancel", no_argument, NULL, 'c'}, {"suspend", no_argument, NULL, 's'}, {"uninit", no_argument, NULL, 'u'}, {"wait", no_argument, NULL, 'w'}, {0, 0, 0, 0} }; pool_initialize_func_t cmd_type = POOL_INITIALIZE_START; while ((c = getopt_long(argc, argv, "csuw", long_options, NULL)) != -1) { switch (c) { case 'c': if (cmd_type != POOL_INITIALIZE_START && cmd_type != POOL_INITIALIZE_CANCEL) { (void) fprintf(stderr, gettext("-c cannot be " "combined with other options\n")); usage(B_FALSE); } cmd_type = POOL_INITIALIZE_CANCEL; break; case 's': if (cmd_type != POOL_INITIALIZE_START && cmd_type != POOL_INITIALIZE_SUSPEND) { (void) fprintf(stderr, gettext("-s cannot be " "combined with other options\n")); usage(B_FALSE); } cmd_type = POOL_INITIALIZE_SUSPEND; break; case 'u': if (cmd_type != POOL_INITIALIZE_START && cmd_type != POOL_INITIALIZE_UNINIT) { (void) fprintf(stderr, gettext("-u cannot be " "combined with other options\n")); usage(B_FALSE); } cmd_type = POOL_INITIALIZE_UNINIT; break; case 'w': wait = B_TRUE; break; case '?': if (optopt != 0) { (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); } else { (void) fprintf(stderr, gettext("invalid option '%s'\n"), argv[optind - 1]); } usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); return (-1); } if (wait && (cmd_type != POOL_INITIALIZE_START)) { (void) fprintf(stderr, gettext("-w cannot be used with -c, -s" "or -u\n")); usage(B_FALSE); } poolname = argv[0]; zhp = zpool_open(g_zfs, poolname); if (zhp == NULL) return (-1); vdevs = fnvlist_alloc(); if (argc == 1) { /* no individual leaf vdevs specified, so add them all */ nvlist_t *config = zpool_get_config(zhp, NULL); nvlist_t *nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); zpool_collect_leaves(zhp, nvroot, vdevs); } else { for (int i = 1; i < argc; i++) { fnvlist_add_boolean(vdevs, argv[i]); } } if (wait) err = zpool_initialize_wait(zhp, cmd_type, vdevs); else err = zpool_initialize(zhp, cmd_type, vdevs); fnvlist_free(vdevs); zpool_close(zhp); return (err); } /* * print a pool vdev config for dry runs */ static void print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent, const char *match, int name_flags) { nvlist_t **child; uint_t c, children; char *vname; boolean_t printed = B_FALSE; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { if (name != NULL) (void) printf("\t%*s%s\n", indent, "", name); return; } for (c = 0; c < children; c++) { uint64_t is_log = B_FALSE, is_hole = B_FALSE; char *class = ""; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &is_hole); if (is_hole == B_TRUE) { continue; } (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log); if (is_log) class = VDEV_ALLOC_BIAS_LOG; (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &class); if (strcmp(match, class) != 0) continue; if (!printed && name != NULL) { (void) printf("\t%*s%s\n", indent, "", name); printed = B_TRUE; } vname = zpool_vdev_name(g_zfs, zhp, child[c], name_flags); print_vdev_tree(zhp, vname, child[c], indent + 2, "", name_flags); free(vname); } } /* * Print the list of l2cache devices for dry runs. */ static void print_cache_list(nvlist_t *nv, int indent) { nvlist_t **child; uint_t c, children; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0 && children > 0) { (void) printf("\t%*s%s\n", indent, "", "cache"); } else { return; } for (c = 0; c < children; c++) { char *vname; vname = zpool_vdev_name(g_zfs, NULL, child[c], 0); (void) printf("\t%*s%s\n", indent + 2, "", vname); free(vname); } } /* * Print the list of spares for dry runs. */ static void print_spare_list(nvlist_t *nv, int indent) { nvlist_t **child; uint_t c, children; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0 && children > 0) { (void) printf("\t%*s%s\n", indent, "", "spares"); } else { return; } for (c = 0; c < children; c++) { char *vname; vname = zpool_vdev_name(g_zfs, NULL, child[c], 0); (void) printf("\t%*s%s\n", indent + 2, "", vname); free(vname); } } static boolean_t prop_list_contains_feature(nvlist_t *proplist) { nvpair_t *nvp; for (nvp = nvlist_next_nvpair(proplist, NULL); NULL != nvp; nvp = nvlist_next_nvpair(proplist, nvp)) { if (zpool_prop_feature(nvpair_name(nvp))) return (B_TRUE); } return (B_FALSE); } /* * Add a property pair (name, string-value) into a property nvlist. */ static int add_prop_list(const char *propname, char *propval, nvlist_t **props, boolean_t poolprop) { zpool_prop_t prop = ZPOOL_PROP_INVAL; nvlist_t *proplist; const char *normnm; char *strval; if (*props == NULL && nvlist_alloc(props, NV_UNIQUE_NAME, 0) != 0) { (void) fprintf(stderr, gettext("internal error: out of memory\n")); return (1); } proplist = *props; if (poolprop) { const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION); const char *cname = zpool_prop_to_name(ZPOOL_PROP_COMPATIBILITY); if ((prop = zpool_name_to_prop(propname)) == ZPOOL_PROP_INVAL && !zpool_prop_feature(propname)) { (void) fprintf(stderr, gettext("property '%s' is " "not a valid pool property\n"), propname); return (2); } /* * feature@ properties and version should not be specified * at the same time. */ if ((prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname) && nvlist_exists(proplist, vname)) || (prop == ZPOOL_PROP_VERSION && prop_list_contains_feature(proplist))) { (void) fprintf(stderr, gettext("'feature@' and " "'version' properties cannot be specified " "together\n")); return (2); } /* * if version is specified, only "legacy" compatibility * may be requested */ if ((prop == ZPOOL_PROP_COMPATIBILITY && strcmp(propval, ZPOOL_COMPAT_LEGACY) != 0 && nvlist_exists(proplist, vname)) || (prop == ZPOOL_PROP_VERSION && nvlist_exists(proplist, cname) && strcmp(fnvlist_lookup_string(proplist, cname), ZPOOL_COMPAT_LEGACY) != 0)) { (void) fprintf(stderr, gettext("when 'version' is " "specified, the 'compatibility' feature may only " "be set to '" ZPOOL_COMPAT_LEGACY "'\n")); return (2); } if (zpool_prop_feature(propname)) normnm = propname; else normnm = zpool_prop_to_name(prop); } else { zfs_prop_t fsprop = zfs_name_to_prop(propname); if (zfs_prop_valid_for_type(fsprop, ZFS_TYPE_FILESYSTEM, B_FALSE)) { normnm = zfs_prop_to_name(fsprop); } else if (zfs_prop_user(propname) || zfs_prop_userquota(propname)) { normnm = propname; } else { (void) fprintf(stderr, gettext("property '%s' is " "not a valid filesystem property\n"), propname); return (2); } } if (nvlist_lookup_string(proplist, normnm, &strval) == 0 && prop != ZPOOL_PROP_CACHEFILE) { (void) fprintf(stderr, gettext("property '%s' " "specified multiple times\n"), propname); return (2); } if (nvlist_add_string(proplist, normnm, propval) != 0) { (void) fprintf(stderr, gettext("internal " "error: out of memory\n")); return (1); } return (0); } /* * Set a default property pair (name, string-value) in a property nvlist */ static int add_prop_list_default(const char *propname, char *propval, nvlist_t **props, boolean_t poolprop) { char *pval; if (nvlist_lookup_string(*props, propname, &pval) == 0) return (0); return (add_prop_list(propname, propval, props, B_TRUE)); } /* * zpool add [-fgLnP] [-o property=value] ... * * -f Force addition of devices, even if they appear in use * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. * -n Do not add the devices, but display the resulting layout if * they were to be added. * -o Set property=value. * -P Display full path for vdev name. * * Adds the given vdevs to 'pool'. As with create, the bulk of this work is * handled by make_root_vdev(), which constructs the nvlist needed to pass to * libzfs. */ int zpool_do_add(int argc, char **argv) { boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; int name_flags = 0; int c; nvlist_t *nvroot; char *poolname; int ret; zpool_handle_t *zhp; nvlist_t *config; nvlist_t *props = NULL; char *propval; /* check options */ while ((c = getopt(argc, argv, "fgLno:P")) != -1) { switch (c) { case 'f': force = B_TRUE; break; case 'g': name_flags |= VDEV_NAME_GUID; break; case 'L': name_flags |= VDEV_NAME_FOLLOW_LINKS; break; case 'n': dryrun = B_TRUE; break; case 'o': if ((propval = strchr(optarg, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " "'=' for -o option\n")); usage(B_FALSE); } *propval = '\0'; propval++; if ((strcmp(optarg, ZPOOL_CONFIG_ASHIFT) != 0) || (add_prop_list(optarg, propval, &props, B_TRUE))) usage(B_FALSE); break; case 'P': name_flags |= VDEV_NAME_PATH; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing vdev specification\n")); usage(B_FALSE); } poolname = argv[0]; argc--; argv++; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); if ((config = zpool_get_config(zhp, NULL)) == NULL) { (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), poolname); zpool_close(zhp); return (1); } /* unless manually specified use "ashift" pool property (if set) */ if (!nvlist_exists(props, ZPOOL_CONFIG_ASHIFT)) { int intval; zprop_source_t src; char strval[ZPOOL_MAXPROPLEN]; intval = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &src); if (src != ZPROP_SRC_DEFAULT) { (void) sprintf(strval, "%" PRId32, intval); verify(add_prop_list(ZPOOL_CONFIG_ASHIFT, strval, &props, B_TRUE) == 0); } } /* pass off to make_root_vdev for processing */ nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); } if (dryrun) { nvlist_t *poolnvroot; nvlist_t **l2child, **sparechild; uint_t l2children, sparechildren, c; char *vname; boolean_t hadcache = B_FALSE, hadspare = B_FALSE; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &poolnvroot) == 0); (void) printf(gettext("would update '%s' to the following " "configuration:\n\n"), zpool_get_name(zhp)); /* print original main pool and new tree */ print_vdev_tree(zhp, poolname, poolnvroot, 0, "", name_flags | VDEV_NAME_TYPE_ID); print_vdev_tree(zhp, NULL, nvroot, 0, "", name_flags); /* print other classes: 'dedup', 'special', and 'log' */ if (zfs_special_devs(poolnvroot, VDEV_ALLOC_BIAS_DEDUP)) { print_vdev_tree(zhp, "dedup", poolnvroot, 0, VDEV_ALLOC_BIAS_DEDUP, name_flags); print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_DEDUP, name_flags); } else if (zfs_special_devs(nvroot, VDEV_ALLOC_BIAS_DEDUP)) { print_vdev_tree(zhp, "dedup", nvroot, 0, VDEV_ALLOC_BIAS_DEDUP, name_flags); } if (zfs_special_devs(poolnvroot, VDEV_ALLOC_BIAS_SPECIAL)) { print_vdev_tree(zhp, "special", poolnvroot, 0, VDEV_ALLOC_BIAS_SPECIAL, name_flags); print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL, name_flags); } else if (zfs_special_devs(nvroot, VDEV_ALLOC_BIAS_SPECIAL)) { print_vdev_tree(zhp, "special", nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL, name_flags); } if (num_logs(poolnvroot) > 0) { print_vdev_tree(zhp, "logs", poolnvroot, 0, VDEV_ALLOC_BIAS_LOG, name_flags); print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_LOG, name_flags); } else if (num_logs(nvroot) > 0) { print_vdev_tree(zhp, "logs", nvroot, 0, VDEV_ALLOC_BIAS_LOG, name_flags); } /* Do the same for the caches */ if (nvlist_lookup_nvlist_array(poolnvroot, ZPOOL_CONFIG_L2CACHE, &l2child, &l2children) == 0 && l2children) { hadcache = B_TRUE; (void) printf(gettext("\tcache\n")); for (c = 0; c < l2children; c++) { vname = zpool_vdev_name(g_zfs, NULL, l2child[c], name_flags); (void) printf("\t %s\n", vname); free(vname); } } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2child, &l2children) == 0 && l2children) { if (!hadcache) (void) printf(gettext("\tcache\n")); for (c = 0; c < l2children; c++) { vname = zpool_vdev_name(g_zfs, NULL, l2child[c], name_flags); (void) printf("\t %s\n", vname); free(vname); } } /* And finally the spares */ if (nvlist_lookup_nvlist_array(poolnvroot, ZPOOL_CONFIG_SPARES, &sparechild, &sparechildren) == 0 && sparechildren > 0) { hadspare = B_TRUE; (void) printf(gettext("\tspares\n")); for (c = 0; c < sparechildren; c++) { vname = zpool_vdev_name(g_zfs, NULL, sparechild[c], name_flags); (void) printf("\t %s\n", vname); free(vname); } } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &sparechild, &sparechildren) == 0 && sparechildren > 0) { if (!hadspare) (void) printf(gettext("\tspares\n")); for (c = 0; c < sparechildren; c++) { vname = zpool_vdev_name(g_zfs, NULL, sparechild[c], name_flags); (void) printf("\t %s\n", vname); free(vname); } } ret = 0; } else { ret = (zpool_add(zhp, nvroot) != 0); } nvlist_free(props); nvlist_free(nvroot); zpool_close(zhp); return (ret); } /* * zpool remove [-npsw] ... * * Removes the given vdev from the pool. */ int zpool_do_remove(int argc, char **argv) { char *poolname; int i, ret = 0; zpool_handle_t *zhp = NULL; boolean_t stop = B_FALSE; int c; boolean_t noop = B_FALSE; boolean_t parsable = B_FALSE; boolean_t wait = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "npsw")) != -1) { switch (c) { case 'n': noop = B_TRUE; break; case 'p': parsable = B_TRUE; break; case 's': stop = B_TRUE; break; case 'w': wait = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } poolname = argv[0]; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); if (stop && noop) { (void) fprintf(stderr, gettext("stop request ignored\n")); return (0); } if (stop) { if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if (zpool_vdev_remove_cancel(zhp) != 0) ret = 1; if (wait) { (void) fprintf(stderr, gettext("invalid option " "combination: -w cannot be used with -s\n")); usage(B_FALSE); } } else { if (argc < 2) { (void) fprintf(stderr, gettext("missing device\n")); usage(B_FALSE); } for (i = 1; i < argc; i++) { if (noop) { uint64_t size; if (zpool_vdev_indirect_size(zhp, argv[i], &size) != 0) { ret = 1; break; } if (parsable) { (void) printf("%s %llu\n", argv[i], (unsigned long long)size); } else { char valstr[32]; zfs_nicenum(size, valstr, sizeof (valstr)); (void) printf("Memory that will be " "used after removing %s: %s\n", argv[i], valstr); } } else { if (zpool_vdev_remove(zhp, argv[i]) != 0) ret = 1; } } if (ret == 0 && wait) ret = zpool_wait(zhp, ZPOOL_WAIT_REMOVE); } zpool_close(zhp); return (ret); } /* * Return 1 if a vdev is active (being used in a pool) * Return 0 if a vdev is inactive (offlined or faulted, or not in active pool) * * This is useful for checking if a disk in an active pool is offlined or * faulted. */ static int vdev_is_active(char *vdev_path) { int fd; fd = open(vdev_path, O_EXCL); if (fd < 0) { return (1); /* cant open O_EXCL - disk is active */ } close(fd); return (0); /* disk is inactive in the pool */ } /* * zpool labelclear [-f] * * -f Force clearing the label for the vdevs which are members of * the exported or foreign pools. * * Verifies that the vdev is not active and zeros out the label information * on the device. */ int zpool_do_labelclear(int argc, char **argv) { char vdev[MAXPATHLEN]; char *name = NULL; struct stat st; int c, fd = -1, ret = 0; nvlist_t *config; pool_state_t state; boolean_t inuse = B_FALSE; boolean_t force = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "f")) != -1) { switch (c) { case 'f': force = B_TRUE; break; default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get vdev name */ if (argc < 1) { (void) fprintf(stderr, gettext("missing vdev name\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } /* * Check if we were given absolute path and use it as is. * Otherwise if the provided vdev name doesn't point to a file, * try prepending expected disk paths and partition numbers. */ (void) strlcpy(vdev, argv[0], sizeof (vdev)); if (vdev[0] != '/' && stat(vdev, &st) != 0) { int error; error = zfs_resolve_shortname(argv[0], vdev, MAXPATHLEN); if (error == 0 && zfs_dev_is_whole_disk(vdev)) { if (zfs_append_partition(vdev, MAXPATHLEN) == -1) error = ENOENT; } if (error || (stat(vdev, &st) != 0)) { (void) fprintf(stderr, gettext( "failed to find device %s, try specifying absolute " "path instead\n"), argv[0]); return (1); } } if ((fd = open(vdev, O_RDWR)) < 0) { (void) fprintf(stderr, gettext("failed to open %s: %s\n"), vdev, strerror(errno)); return (1); } /* * Flush all dirty pages for the block device. This should not be * fatal when the device does not support BLKFLSBUF as would be the * case for a file vdev. */ if ((zfs_dev_flush(fd) != 0) && (errno != ENOTTY)) (void) fprintf(stderr, gettext("failed to invalidate " "cache for %s: %s\n"), vdev, strerror(errno)); if (zpool_read_label(fd, &config, NULL) != 0) { (void) fprintf(stderr, gettext("failed to read label from %s\n"), vdev); ret = 1; goto errout; } nvlist_free(config); ret = zpool_in_use(g_zfs, fd, &state, &name, &inuse); if (ret != 0) { (void) fprintf(stderr, gettext("failed to check state for %s\n"), vdev); ret = 1; goto errout; } if (!inuse) goto wipe_label; switch (state) { default: case POOL_STATE_ACTIVE: case POOL_STATE_SPARE: case POOL_STATE_L2CACHE: /* * We allow the user to call 'zpool offline -f' * on an offlined disk in an active pool. We can check if * the disk is online by calling vdev_is_active(). */ if (force && !vdev_is_active(vdev)) break; (void) fprintf(stderr, gettext( "%s is a member (%s) of pool \"%s\""), vdev, zpool_pool_state_to_name(state), name); if (force) { (void) fprintf(stderr, gettext( ". Offline the disk first to clear its label.")); } printf("\n"); ret = 1; goto errout; case POOL_STATE_EXPORTED: if (force) break; (void) fprintf(stderr, gettext( "use '-f' to override the following error:\n" "%s is a member of exported pool \"%s\"\n"), vdev, name); ret = 1; goto errout; case POOL_STATE_POTENTIALLY_ACTIVE: if (force) break; (void) fprintf(stderr, gettext( "use '-f' to override the following error:\n" "%s is a member of potentially active pool \"%s\"\n"), vdev, name); ret = 1; goto errout; case POOL_STATE_DESTROYED: /* inuse should never be set for a destroyed pool */ assert(0); break; } wipe_label: ret = zpool_clear_label(fd); if (ret != 0) { (void) fprintf(stderr, gettext("failed to clear label for %s\n"), vdev); } errout: free(name); (void) close(fd); return (ret); } /* * zpool create [-fnd] [-o property=value] ... * [-O file-system-property=value] ... * [-R root] [-m mountpoint] ... * * -f Force creation, even if devices appear in use * -n Do not create the pool, but display the resulting layout if it * were to be created. * -R Create a pool under an alternate root * -m Set default mountpoint for the root dataset. By default it's * '/' * -o Set property=value. * -o Set feature@feature=enabled|disabled. * -d Don't automatically enable all supported pool features * (individual features can be enabled with -o). * -O Set fsproperty=value in the pool's root file system * * Creates the named pool according to the given vdev specification. The * bulk of the vdev processing is done in make_root_vdev() in zpool_vdev.c. * Once we get the nvlist back from make_root_vdev(), we either print out the * contents (if '-n' was specified), or pass it to libzfs to do the creation. */ int zpool_do_create(int argc, char **argv) { boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; boolean_t enable_pool_features = B_TRUE; int c; nvlist_t *nvroot = NULL; char *poolname; char *tname = NULL; int ret = 1; char *altroot = NULL; char *compat = NULL; char *mountpoint = NULL; nvlist_t *fsprops = NULL; nvlist_t *props = NULL; char *propval; /* check options */ while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) { switch (c) { case 'f': force = B_TRUE; break; case 'n': dryrun = B_TRUE; break; case 'd': enable_pool_features = B_FALSE; break; case 'R': altroot = optarg; if (add_prop_list(zpool_prop_to_name( ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) goto errout; if (add_prop_list_default(zpool_prop_to_name( ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) goto errout; break; case 'm': /* Equivalent to -O mountpoint=optarg */ mountpoint = optarg; break; case 'o': if ((propval = strchr(optarg, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " "'=' for -o option\n")); goto errout; } *propval = '\0'; propval++; if (add_prop_list(optarg, propval, &props, B_TRUE)) goto errout; /* * If the user is creating a pool that doesn't support * feature flags, don't enable any features. */ if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) { char *end; u_longlong_t ver; ver = strtoull(propval, &end, 10); if (*end == '\0' && ver < SPA_VERSION_FEATURES) { enable_pool_features = B_FALSE; } } if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT) altroot = propval; if (zpool_name_to_prop(optarg) == ZPOOL_PROP_COMPATIBILITY) compat = propval; break; case 'O': if ((propval = strchr(optarg, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " "'=' for -O option\n")); goto errout; } *propval = '\0'; propval++; /* * Mountpoints are checked and then added later. * Uniquely among properties, they can be specified * more than once, to avoid conflict with -m. */ if (0 == strcmp(optarg, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT))) { mountpoint = propval; } else if (add_prop_list(optarg, propval, &fsprops, B_FALSE)) { goto errout; } break; case 't': /* * Sanity check temporary pool name. */ if (strchr(optarg, '/') != NULL) { (void) fprintf(stderr, gettext("cannot create " "'%s': invalid character '/' in temporary " "name\n"), optarg); (void) fprintf(stderr, gettext("use 'zfs " "create' to create a dataset\n")); goto errout; } if (add_prop_list(zpool_prop_to_name( ZPOOL_PROP_TNAME), optarg, &props, B_TRUE)) goto errout; if (add_prop_list_default(zpool_prop_to_name( ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) goto errout; tname = optarg; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); goto badusage; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); goto badusage; } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); goto badusage; } if (argc < 2) { (void) fprintf(stderr, gettext("missing vdev specification\n")); goto badusage; } poolname = argv[0]; /* * As a special case, check for use of '/' in the name, and direct the * user to use 'zfs create' instead. */ if (strchr(poolname, '/') != NULL) { (void) fprintf(stderr, gettext("cannot create '%s': invalid " "character '/' in pool name\n"), poolname); (void) fprintf(stderr, gettext("use 'zfs create' to " "create a dataset\n")); goto errout; } /* pass off to make_root_vdev for bulk processing */ nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, argc - 1, argv + 1); if (nvroot == NULL) goto errout; /* make_root_vdev() allows 0 toplevel children if there are spares */ if (!zfs_allocatable_devs(nvroot)) { (void) fprintf(stderr, gettext("invalid vdev " "specification: at least one toplevel vdev must be " "specified\n")); goto errout; } if (altroot != NULL && altroot[0] != '/') { (void) fprintf(stderr, gettext("invalid alternate root '%s': " "must be an absolute path\n"), altroot); goto errout; } /* * Check the validity of the mountpoint and direct the user to use the * '-m' mountpoint option if it looks like its in use. */ if (mountpoint == NULL || (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { char buf[MAXPATHLEN]; DIR *dirp; if (mountpoint && mountpoint[0] != '/') { (void) fprintf(stderr, gettext("invalid mountpoint " "'%s': must be an absolute path, 'legacy', or " "'none'\n"), mountpoint); goto errout; } if (mountpoint == NULL) { if (altroot != NULL) (void) snprintf(buf, sizeof (buf), "%s/%s", altroot, poolname); else (void) snprintf(buf, sizeof (buf), "/%s", poolname); } else { if (altroot != NULL) (void) snprintf(buf, sizeof (buf), "%s%s", altroot, mountpoint); else (void) snprintf(buf, sizeof (buf), "%s", mountpoint); } if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { (void) fprintf(stderr, gettext("mountpoint '%s' : " "%s\n"), buf, strerror(errno)); (void) fprintf(stderr, gettext("use '-m' " "option to provide a different default\n")); goto errout; } else if (dirp) { int count = 0; while (count < 3 && readdir(dirp) != NULL) count++; (void) closedir(dirp); if (count > 2) { (void) fprintf(stderr, gettext("mountpoint " "'%s' exists and is not empty\n"), buf); (void) fprintf(stderr, gettext("use '-m' " "option to provide a " "different default\n")); goto errout; } } } /* * Now that the mountpoint's validity has been checked, ensure that * the property is set appropriately prior to creating the pool. */ if (mountpoint != NULL) { ret = add_prop_list(zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), mountpoint, &fsprops, B_FALSE); if (ret != 0) goto errout; } ret = 1; if (dryrun) { /* * For a dry run invocation, print out a basic message and run * through all the vdevs in the list and print out in an * appropriate hierarchy. */ (void) printf(gettext("would create '%s' with the " "following layout:\n\n"), poolname); print_vdev_tree(NULL, poolname, nvroot, 0, "", 0); print_vdev_tree(NULL, "dedup", nvroot, 0, VDEV_ALLOC_BIAS_DEDUP, 0); print_vdev_tree(NULL, "special", nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL, 0); print_vdev_tree(NULL, "logs", nvroot, 0, VDEV_ALLOC_BIAS_LOG, 0); print_cache_list(nvroot, 0); print_spare_list(nvroot, 0); ret = 0; } else { /* * Load in feature set. * Note: if compatibility property not given, we'll have * NULL, which means 'all features'. */ boolean_t requested_features[SPA_FEATURES]; if (zpool_do_load_compat(compat, requested_features) != ZPOOL_COMPATIBILITY_OK) goto errout; /* * props contains list of features to enable. * For each feature: * - remove it if feature@name=disabled * - leave it there if feature@name=enabled * - add it if: * - enable_pool_features (ie: no '-d' or '-o version') * - it's supported by the kernel module * - it's in the requested feature set * - warn if it's enabled but not in compat */ for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { char propname[MAXPATHLEN]; char *propval; zfeature_info_t *feat = &spa_feature_table[i]; (void) snprintf(propname, sizeof (propname), "feature@%s", feat->fi_uname); if (!nvlist_lookup_string(props, propname, &propval)) { if (strcmp(propval, ZFS_FEATURE_DISABLED) == 0) (void) nvlist_remove_all(props, propname); if (strcmp(propval, ZFS_FEATURE_ENABLED) == 0 && !requested_features[i]) (void) fprintf(stderr, gettext( "Warning: feature \"%s\" enabled " "but is not in specified " "'compatibility' feature set.\n"), feat->fi_uname); } else if ( enable_pool_features && feat->fi_zfs_mod_supported && requested_features[i]) { ret = add_prop_list(propname, ZFS_FEATURE_ENABLED, &props, B_TRUE); if (ret != 0) goto errout; } } ret = 1; if (zpool_create(g_zfs, poolname, nvroot, props, fsprops) == 0) { zfs_handle_t *pool = zfs_open(g_zfs, tname ? tname : poolname, ZFS_TYPE_FILESYSTEM); if (pool != NULL) { if (zfs_mount(pool, NULL, 0) == 0) { ret = zfs_shareall(pool); zfs_commit_all_shares(); } zfs_close(pool); } } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { (void) fprintf(stderr, gettext("pool name may have " "been omitted\n")); } } errout: nvlist_free(nvroot); nvlist_free(fsprops); nvlist_free(props); return (ret); badusage: nvlist_free(fsprops); nvlist_free(props); usage(B_FALSE); return (2); } /* * zpool destroy * * -f Forcefully unmount any datasets * * Destroy the given pool. Automatically unmounts any datasets in the pool. */ int zpool_do_destroy(int argc, char **argv) { boolean_t force = B_FALSE; int c; char *pool; zpool_handle_t *zhp; int ret; /* check options */ while ((c = getopt(argc, argv, "f")) != -1) { switch (c) { case 'f': force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* check arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } pool = argv[0]; if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { /* * As a special case, check for use of '/' in the name, and * direct the user to use 'zfs destroy' instead. */ if (strchr(pool, '/') != NULL) (void) fprintf(stderr, gettext("use 'zfs destroy' to " "destroy a dataset\n")); return (1); } if (zpool_disable_datasets(zhp, force) != 0) { (void) fprintf(stderr, gettext("could not destroy '%s': " "could not unmount datasets\n"), zpool_get_name(zhp)); zpool_close(zhp); return (1); } /* The history must be logged as part of the export */ log_history = B_FALSE; ret = (zpool_destroy(zhp, history_str) != 0); zpool_close(zhp); return (ret); } typedef struct export_cbdata { boolean_t force; boolean_t hardforce; } export_cbdata_t; /* * Export one pool */ static int zpool_export_one(zpool_handle_t *zhp, void *data) { export_cbdata_t *cb = data; if (zpool_disable_datasets(zhp, cb->force) != 0) return (1); /* The history must be logged as part of the export */ log_history = B_FALSE; if (cb->hardforce) { if (zpool_export_force(zhp, history_str) != 0) return (1); } else if (zpool_export(zhp, cb->force, history_str) != 0) { return (1); } return (0); } /* * zpool export [-f] ... * * -a Export all pools * -f Forcefully unmount datasets * * Export the given pools. By default, the command will attempt to cleanly * unmount any active datasets within the pool. If the '-f' flag is specified, * then the datasets will be forcefully unmounted. */ int zpool_do_export(int argc, char **argv) { export_cbdata_t cb; boolean_t do_all = B_FALSE; boolean_t force = B_FALSE; boolean_t hardforce = B_FALSE; int c, ret; /* check options */ while ((c = getopt(argc, argv, "afF")) != -1) { switch (c) { case 'a': do_all = B_TRUE; break; case 'f': force = B_TRUE; break; case 'F': hardforce = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } cb.force = force; cb.hardforce = hardforce; argc -= optind; argv += optind; if (do_all) { if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } return (for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_export_one, &cb)); } /* check arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool argument\n")); usage(B_FALSE); } ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_export_one, &cb); return (ret); } /* * Given a vdev configuration, determine the maximum width needed for the device * name column. */ static int max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max, int name_flags) { char *name; nvlist_t **child; uint_t c, children; int ret; name = zpool_vdev_name(g_zfs, zhp, nv, name_flags); if (strlen(name) + depth > max) max = strlen(name) + depth; free(name); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { for (c = 0; c < children; c++) if ((ret = max_width(zhp, child[c], depth + 2, max, name_flags)) > max) max = ret; } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) if ((ret = max_width(zhp, child[c], depth + 2, max, name_flags)) > max) max = ret; } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if ((ret = max_width(zhp, child[c], depth + 2, max, name_flags)) > max) max = ret; } return (max); } typedef struct spare_cbdata { uint64_t cb_guid; zpool_handle_t *cb_zhp; } spare_cbdata_t; static boolean_t find_vdev(nvlist_t *nv, uint64_t search) { uint64_t guid; nvlist_t **child; uint_t c, children; if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && search == guid) return (B_TRUE); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if (find_vdev(child[c], search)) return (B_TRUE); } return (B_FALSE); } static int find_spare(zpool_handle_t *zhp, void *data) { spare_cbdata_t *cbp = data; nvlist_t *config, *nvroot; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (find_vdev(nvroot, cbp->cb_guid)) { cbp->cb_zhp = zhp; return (1); } zpool_close(zhp); return (0); } typedef struct status_cbdata { int cb_count; int cb_name_flags; int cb_namewidth; boolean_t cb_allpools; boolean_t cb_verbose; boolean_t cb_literal; boolean_t cb_explain; boolean_t cb_first; boolean_t cb_dedup_stats; boolean_t cb_print_status; boolean_t cb_print_slow_ios; boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; + boolean_t cb_print_power; } status_cbdata_t; /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */ static int is_blank_str(char *str) { while (str != NULL && *str != '\0') { if (!isblank(*str)) return (0); str++; } return (1); } /* Print command output lines for specific vdev in a specific pool */ static void zpool_print_cmd(vdev_cmd_data_list_t *vcdl, const char *pool, char *path) { vdev_cmd_data_t *data; int i, j; char *val; for (i = 0; i < vcdl->count; i++) { if ((strcmp(vcdl->data[i].path, path) != 0) || (strcmp(vcdl->data[i].pool, pool) != 0)) { /* Not the vdev we're looking for */ continue; } data = &vcdl->data[i]; /* Print out all the output values for this vdev */ for (j = 0; j < vcdl->uniq_cols_cnt; j++) { val = NULL; /* Does this vdev have values for this column? */ for (int k = 0; k < data->cols_cnt; k++) { if (strcmp(data->cols[k], vcdl->uniq_cols[j]) == 0) { /* yes it does, record the value */ val = data->lines[k]; break; } } /* * Mark empty values with dashes to make output * awk-able. */ if (val == NULL || is_blank_str(val)) val = "-"; printf("%*s", vcdl->uniq_cols_width[j], val); if (j < vcdl->uniq_cols_cnt - 1) printf(" "); } /* Print out any values that aren't in a column at the end */ for (j = data->cols_cnt; j < data->lines_cnt; j++) { /* Did we have any columns? If so print a spacer. */ if (vcdl->uniq_cols_cnt > 0) printf(" "); val = data->lines[j]; printf("%s", val ? val : ""); } break; } } /* * Print vdev initialization status for leaves */ static void print_status_initialize(vdev_stat_t *vs, boolean_t verbose) { if (verbose) { if ((vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE || vs->vs_initialize_state == VDEV_INITIALIZE_SUSPENDED || vs->vs_initialize_state == VDEV_INITIALIZE_COMPLETE) && !vs->vs_scan_removing) { char zbuf[1024]; char tbuf[256]; struct tm zaction_ts; time_t t = vs->vs_initialize_action_time; int initialize_pct = 100; if (vs->vs_initialize_state != VDEV_INITIALIZE_COMPLETE) { initialize_pct = (vs->vs_initialize_bytes_done * 100 / (vs->vs_initialize_bytes_est + 1)); } (void) localtime_r(&t, &zaction_ts); (void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts); switch (vs->vs_initialize_state) { case VDEV_INITIALIZE_SUSPENDED: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("suspended, started at"), tbuf); break; case VDEV_INITIALIZE_ACTIVE: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("started at"), tbuf); break; case VDEV_INITIALIZE_COMPLETE: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("completed at"), tbuf); break; } (void) printf(gettext(" (%d%% initialized%s)"), initialize_pct, zbuf); } else { (void) printf(gettext(" (uninitialized)")); } } else if (vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE) { (void) printf(gettext(" (initializing)")); } } /* * Print vdev TRIM status for leaves */ static void print_status_trim(vdev_stat_t *vs, boolean_t verbose) { if (verbose) { if ((vs->vs_trim_state == VDEV_TRIM_ACTIVE || vs->vs_trim_state == VDEV_TRIM_SUSPENDED || vs->vs_trim_state == VDEV_TRIM_COMPLETE) && !vs->vs_scan_removing) { char zbuf[1024]; char tbuf[256]; struct tm zaction_ts; time_t t = vs->vs_trim_action_time; int trim_pct = 100; if (vs->vs_trim_state != VDEV_TRIM_COMPLETE) { trim_pct = (vs->vs_trim_bytes_done * 100 / (vs->vs_trim_bytes_est + 1)); } (void) localtime_r(&t, &zaction_ts); (void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts); switch (vs->vs_trim_state) { case VDEV_TRIM_SUSPENDED: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("suspended, started at"), tbuf); break; case VDEV_TRIM_ACTIVE: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("started at"), tbuf); break; case VDEV_TRIM_COMPLETE: (void) snprintf(zbuf, sizeof (zbuf), ", %s %s", gettext("completed at"), tbuf); break; } (void) printf(gettext(" (%d%% trimmed%s)"), trim_pct, zbuf); } else if (vs->vs_trim_notsup) { (void) printf(gettext(" (trim unsupported)")); } else { (void) printf(gettext(" (untrimmed)")); } } else if (vs->vs_trim_state == VDEV_TRIM_ACTIVE) { (void) printf(gettext(" (trimming)")); } } /* * Return the color associated with a health string. This includes returning * NULL for no color change. */ static char * health_str_to_color(const char *health) { if (strcmp(health, gettext("FAULTED")) == 0 || strcmp(health, gettext("SUSPENDED")) == 0 || strcmp(health, gettext("UNAVAIL")) == 0) { return (ANSI_RED); } if (strcmp(health, gettext("OFFLINE")) == 0 || strcmp(health, gettext("DEGRADED")) == 0 || strcmp(health, gettext("REMOVED")) == 0) { return (ANSI_YELLOW); } return (NULL); } /* * Print out configuration state as requested by status_callback. */ static void print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, nvlist_t *nv, int depth, boolean_t isspare, vdev_rebuild_stat_t *vrs) { nvlist_t **child, *root; uint_t c, i, vsc, children; pool_scan_stat_t *ps = NULL; vdev_stat_t *vs; char rbuf[6], wbuf[6], cbuf[6]; char *vname; uint64_t notpresent; spare_cbdata_t spare_cb; const char *state; char *type; char *path = NULL; char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) return; state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if (isspare) { /* * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for * online drives. */ if (vs->vs_aux == VDEV_AUX_SPARED) state = gettext("INUSE"); else if (vs->vs_state == VDEV_STATE_HEALTHY) state = gettext("AVAIL"); } printf_color(health_str_to_color(state), "\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth, name, state); if (!isspare) { if (vs->vs_read_errors) rcolor = ANSI_RED; if (vs->vs_write_errors) wcolor = ANSI_RED; if (vs->vs_checksum_errors) ccolor = ANSI_RED; if (cb->cb_literal) { printf(" "); printf_color(rcolor, "%5llu", (u_longlong_t)vs->vs_read_errors); printf(" "); printf_color(wcolor, "%5llu", (u_longlong_t)vs->vs_write_errors); printf(" "); printf_color(ccolor, "%5llu", (u_longlong_t)vs->vs_checksum_errors); } else { zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); printf(" "); printf_color(rcolor, "%5s", rbuf); printf(" "); printf_color(wcolor, "%5s", wbuf); printf(" "); printf_color(ccolor, "%5s", cbuf); } if (cb->cb_print_slow_ios) { if (children == 0) { /* Only leafs vdevs have slow IOs */ zfs_nicenum(vs->vs_slow_ios, rbuf, sizeof (rbuf)); } else { snprintf(rbuf, sizeof (rbuf), "-"); } if (cb->cb_literal) printf(" %5llu", (u_longlong_t)vs->vs_slow_ios); else printf(" %5s", rbuf); } + if (cb->cb_print_power) { + if (children == 0) { + /* Only leaf vdevs have physical slots */ + switch (zpool_power_current_state(zhp, (char *) + fnvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH))) { + case 0: + printf_color(ANSI_RED, " %5s", + gettext("off")); + break; + case 1: + printf(" %5s", gettext("on")); + break; + default: + printf(" %5s", "-"); + } + } else { + printf(" %5s", "-"); + } + } } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, ¬present) == 0) { verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); (void) printf(" %s %s", gettext("was"), path); } else if (vs->vs_aux != 0) { (void) printf(" "); color_start(ANSI_RED); switch (vs->vs_aux) { case VDEV_AUX_OPEN_FAILED: (void) printf(gettext("cannot open")); break; case VDEV_AUX_BAD_GUID_SUM: (void) printf(gettext("missing device")); break; case VDEV_AUX_NO_REPLICAS: (void) printf(gettext("insufficient replicas")); break; case VDEV_AUX_VERSION_NEWER: (void) printf(gettext("newer version")); break; case VDEV_AUX_UNSUP_FEAT: (void) printf(gettext("unsupported feature(s)")); break; case VDEV_AUX_ASHIFT_TOO_BIG: (void) printf(gettext("unsupported minimum blocksize")); break; case VDEV_AUX_SPARED: verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &spare_cb.cb_guid) == 0); if (zpool_iter(g_zfs, find_spare, &spare_cb) == 1) { if (strcmp(zpool_get_name(spare_cb.cb_zhp), zpool_get_name(zhp)) == 0) (void) printf(gettext("currently in " "use")); else (void) printf(gettext("in use by " "pool '%s'"), zpool_get_name(spare_cb.cb_zhp)); zpool_close(spare_cb.cb_zhp); } else { (void) printf(gettext("currently in use")); } break; case VDEV_AUX_ERR_EXCEEDED: (void) printf(gettext("too many errors")); break; case VDEV_AUX_IO_FAILURE: (void) printf(gettext("experienced I/O failures")); break; case VDEV_AUX_BAD_LOG: (void) printf(gettext("bad intent log")); break; case VDEV_AUX_EXTERNAL: (void) printf(gettext("external device fault")); break; case VDEV_AUX_SPLIT_POOL: (void) printf(gettext("split into new pool")); break; case VDEV_AUX_ACTIVE: (void) printf(gettext("currently in use")); break; case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; case VDEV_AUX_BAD_LABEL: (void) printf(gettext("invalid label")); break; default: (void) printf(gettext("corrupted data")); break; } color_end(); } else if (children == 0 && !isspare && getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL && VDEV_STAT_VALID(vs_physical_ashift, vsc) && vs->vs_configured_ashift < vs->vs_physical_ashift) { (void) printf( gettext(" block size: %dB configured, %dB native"), 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); } /* The root vdev has the scrub/resilver stats */ root = fnvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE); (void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c); /* * If you force fault a drive that's resilvering, its scan stats can * get frozen in time, giving the false impression that it's * being resilvered. That's why we check the state to see if the vdev * is healthy before reporting "resilvering" or "repairing". */ if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0 && vs->vs_state == VDEV_STATE_HEALTHY) { if (vs->vs_scan_processed != 0) { (void) printf(gettext(" (%s)"), (ps->pss_func == POOL_SCAN_RESILVER) ? "resilvering" : "repairing"); } else if (vs->vs_resilver_deferred) { (void) printf(gettext(" (awaiting resilver)")); } } /* The top-level vdevs have the rebuild stats */ if (vrs != NULL && vrs->vrs_state == VDEV_REBUILD_ACTIVE && children == 0 && vs->vs_state == VDEV_STATE_HEALTHY) { if (vs->vs_rebuild_processed != 0) { (void) printf(gettext(" (resilvering)")); } } if (cb->vcdl != NULL) { if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { printf(" "); zpool_print_cmd(cb->vcdl, zpool_get_name(zhp), path); } } /* Display vdev initialization and trim status for leaves. */ if (children == 0) { print_status_initialize(vs, cb->cb_print_vdev_init); print_status_trim(vs, cb->cb_print_vdev_trim); } (void) printf("\n"); for (c = 0; c < children; c++) { uint64_t islog = B_FALSE, ishole = B_FALSE; /* Don't print logs or holes here */ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &islog); (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &ishole); if (islog || ishole) continue; /* Only print normal classes here */ if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) continue; /* Provide vdev_rebuild_stats to children if available */ if (vrs == NULL) { (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i); } vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); print_status_config(zhp, cb, vname, child[c], depth + 2, isspare, vrs); free(vname); } } /* * Print the configuration of an exported pool. Iterate over all vdevs in the * pool, printing out the name and status for each one. */ static void print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv, int depth) { nvlist_t **child; uint_t c, children; vdev_stat_t *vs; char *type, *vname; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_MISSING) == 0 || strcmp(type, VDEV_TYPE_HOLE) == 0) return; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); (void) printf("\t%*s%-*s", depth, "", cb->cb_namewidth - depth, name); (void) printf(" %s", zpool_state_to_name(vs->vs_state, vs->vs_aux)); if (vs->vs_aux != 0) { (void) printf(" "); switch (vs->vs_aux) { case VDEV_AUX_OPEN_FAILED: (void) printf(gettext("cannot open")); break; case VDEV_AUX_BAD_GUID_SUM: (void) printf(gettext("missing device")); break; case VDEV_AUX_NO_REPLICAS: (void) printf(gettext("insufficient replicas")); break; case VDEV_AUX_VERSION_NEWER: (void) printf(gettext("newer version")); break; case VDEV_AUX_UNSUP_FEAT: (void) printf(gettext("unsupported feature(s)")); break; case VDEV_AUX_ERR_EXCEEDED: (void) printf(gettext("too many errors")); break; case VDEV_AUX_ACTIVE: (void) printf(gettext("currently in use")); break; case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; case VDEV_AUX_BAD_LABEL: (void) printf(gettext("invalid label")); break; default: (void) printf(gettext("corrupted data")); break; } } (void) printf("\n"); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return; for (c = 0; c < children; c++) { uint64_t is_log = B_FALSE; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log); if (is_log) continue; if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) continue; vname = zpool_vdev_name(g_zfs, NULL, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); print_import_config(cb, vname, child[c], depth + 2); free(vname); } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { (void) printf(gettext("\tcache\n")); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, NULL, child[c], cb->cb_name_flags); (void) printf("\t %s\n", vname); free(vname); } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { (void) printf(gettext("\tspares\n")); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, NULL, child[c], cb->cb_name_flags); (void) printf("\t %s\n", vname); free(vname); } } } /* * Print specialized class vdevs. * * These are recorded as top level vdevs in the main pool child array * but with "is_log" set to 1 or an "alloc_bias" string. We use either * print_status_config() or print_import_config() to print the top level * class vdevs then any of their children (eg mirrored slogs) are printed * recursively - which works because only the top level vdev is marked. */ static void print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, const char *class) { uint_t c, children; nvlist_t **child; boolean_t printed = B_FALSE; assert(zhp != NULL || !cb->cb_verbose); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return; for (c = 0; c < children; c++) { uint64_t is_log = B_FALSE; char *bias = NULL; char *type = NULL; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log); if (is_log) { bias = VDEV_ALLOC_CLASS_LOGS; } else { (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type); } if (bias == NULL || strcmp(bias, class) != 0) continue; if (!is_log && strcmp(type, VDEV_TYPE_INDIRECT) == 0) continue; if (!printed) { (void) printf("\t%s\t\n", gettext(class)); printed = B_TRUE; } char *name = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); if (cb->cb_print_status) print_status_config(zhp, cb, name, child[c], 2, B_FALSE, NULL); else print_import_config(cb, name, child[c], 2); free(name); } } /* * Display the status for the given pool. */ static int show_import(nvlist_t *config, boolean_t report_error) { uint64_t pool_state; vdev_stat_t *vs; char *name; uint64_t guid; uint64_t hostid = 0; char *msgid; char *hostname = "unknown"; nvlist_t *nvroot, *nvinfo; zpool_status_t reason; zpool_errata_t errata; const char *health; uint_t vsc; char *comment; status_cbdata_t cb = { 0 }; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &pool_state) == 0); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); health = zpool_state_to_name(vs->vs_state, vs->vs_aux); reason = zpool_import_status(config, &msgid, &errata); /* * If we're importing using a cachefile, then we won't report any * errors unless we are in the scan phase of the import. */ if (reason != ZPOOL_STATUS_OK && !report_error) return (reason); (void) printf(gettext(" pool: %s\n"), name); (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); (void) printf(gettext(" state: %s"), health); if (pool_state == POOL_STATE_DESTROYED) (void) printf(gettext(" (DESTROYED)")); (void) printf("\n"); switch (reason) { case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "missing from the system.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: case ZPOOL_STATUS_CORRUPT_LABEL_NR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices contains" " corrupted data.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: (void) printf( gettext(" status: The pool data is corrupted.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices " "are offlined.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool metadata is " "corrupted.\n")); break; case ZPOOL_STATUS_VERSION_OLDER: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is formatted using " "a legacy on-disk version.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is formatted using " "an incompatible version.\n")); break; case ZPOOL_STATUS_FEAT_DISABLED: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Some supported " "features are not enabled on the pool.\n\t" "(Note that they may be intentionally disabled " "if the\n\t'compatibility' property is set.)\n")); break; case ZPOOL_STATUS_COMPATIBILITY_ERR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Error reading or parsing " "the file(s) indicated by the 'compatibility'\n" "property.\n")); break; case ZPOOL_STATUS_INCOMPATIBLE_FEAT: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more features " "are enabled on the pool despite not being\n" "requested by the 'compatibility' property.\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool uses the following " "feature(s) not supported on this system:\n")); color_start(ANSI_YELLOW); zpool_print_unsup_feat(config); color_end(); break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool can only be " "accessed in read-only mode on this system. It\n\tcannot be" " accessed in read-write mode because it uses the " "following\n\tfeature(s) not supported on this system:\n")); color_start(ANSI_YELLOW); zpool_print_unsup_feat(config); color_end(); break; case ZPOOL_STATUS_HOSTID_ACTIVE: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is currently " "imported by another system.\n")); break; case ZPOOL_STATUS_HOSTID_REQUIRED: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool has the " "multihost property on. It cannot\n\tbe safely imported " "when the system hostid is not set.\n")); break; case ZPOOL_STATUS_HOSTID_MISMATCH: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool was last accessed " "by another system.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_R: case ZPOOL_STATUS_FAULTED_DEV_NR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "faulted.\n")); break; case ZPOOL_STATUS_BAD_LOG: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("An intent log record cannot " "be read.\n")); break; case ZPOOL_STATUS_RESILVERING: case ZPOOL_STATUS_REBUILDING: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices were " "being resilvered.\n")); break; case ZPOOL_STATUS_ERRATA: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"), errata); break; case ZPOOL_STATUS_NON_NATIVE_ASHIFT: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "configured to use a non-native block size.\n" "\tExpect reduced performance.\n")); break; default: /* * No other status can be seen when importing pools. */ assert(reason == ZPOOL_STATUS_OK); } /* * Print out an action according to the overall state of the pool. */ if (vs->vs_state == VDEV_STATE_HEALTHY) { if (reason == ZPOOL_STATUS_VERSION_OLDER || reason == ZPOOL_STATUS_FEAT_DISABLED) { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric identifier, " "though\n\tsome features will not be available " "without an explicit 'zpool upgrade'.\n")); } else if (reason == ZPOOL_STATUS_COMPATIBILITY_ERR) { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric\n\tidentifier, " "though the file(s) indicated by its " "'compatibility'\n\tproperty cannot be parsed at " "this time.\n")); } else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier and\n\tthe '-f' flag.\n")); } else if (reason == ZPOOL_STATUS_ERRATA) { switch (errata) { case ZPOOL_ERRATA_NONE: break; case ZPOOL_ERRATA_ZOL_2094_SCRUB: (void) printf(gettext(" action: The pool can " "be imported using its name or numeric " "identifier,\n\thowever there is a compat" "ibility issue which should be corrected" "\n\tby running 'zpool scrub'\n")); break; case ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY: (void) printf(gettext(" action: The pool can" "not be imported with this version of ZFS " "due to\n\tan active asynchronous destroy. " "Revert to an earlier version\n\tand " "allow the destroy to complete before " "updating.\n")); break; case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: (void) printf(gettext(" action: Existing " "encrypted datasets contain an on-disk " "incompatibility, which\n\tneeds to be " "corrected. Backup these datasets to new " "encrypted datasets\n\tand destroy the " "old ones.\n")); break; case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: (void) printf(gettext(" action: Existing " "encrypted snapshots and bookmarks contain " "an on-disk\n\tincompatibility. This may " "cause on-disk corruption if they are used" "\n\twith 'zfs recv'. To correct the " "issue, enable the bookmark_v2 feature.\n\t" "No additional action is needed if there " "are no encrypted snapshots or\n\t" "bookmarks. If preserving the encrypted " "snapshots and bookmarks is\n\trequired, " "use a non-raw send to backup and restore " "them. Alternately,\n\tthey may be removed" " to resolve the incompatibility.\n")); break; default: /* * All errata must contain an action message. */ assert(0); } } else { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier.\n")); } } else if (vs->vs_state == VDEV_STATE_DEGRADED) { (void) printf(gettext(" action: The pool can be imported " "despite missing or damaged devices. The\n\tfault " "tolerance of the pool may be compromised if imported.\n")); } else { switch (reason) { case ZPOOL_STATUS_VERSION_NEWER: (void) printf(gettext(" action: The pool cannot be " "imported. Access the pool on a system running " "newer\n\tsoftware, or recreate the pool from " "backup.\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("The pool cannot be " "imported. Access the pool on a system that " "supports\n\tthe required feature(s), or recreate " "the pool from backup.\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("The pool cannot be " "imported in read-write mode. Import the pool " "with\n" "\t\"-o readonly=on\", access the pool on a system " "that supports the\n\trequired feature(s), or " "recreate the pool from backup.\n")); break; case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: (void) printf(gettext(" action: The pool cannot be " "imported. Attach the missing\n\tdevices and try " "again.\n")); break; case ZPOOL_STATUS_HOSTID_ACTIVE: VERIFY0(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo)); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) hostname = fnvlist_lookup_string(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) hostid = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_HOSTID); (void) printf(gettext(" action: The pool must be " "exported from %s (hostid=%lx)\n\tbefore it " "can be safely imported.\n"), hostname, (unsigned long) hostid); break; case ZPOOL_STATUS_HOSTID_REQUIRED: (void) printf(gettext(" action: Set a unique system " "hostid with the zgenhostid(8) command.\n")); break; default: (void) printf(gettext(" action: The pool cannot be " "imported due to damaged devices or data.\n")); } } /* Print the comment attached to the pool. */ if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) (void) printf(gettext("comment: %s\n"), comment); /* * If the state is "closed" or "can't open", and the aux state * is "corrupt data": */ if (((vs->vs_state == VDEV_STATE_CLOSED) || (vs->vs_state == VDEV_STATE_CANT_OPEN)) && (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) { if (pool_state == POOL_STATE_DESTROYED) (void) printf(gettext("\tThe pool was destroyed, " "but can be imported using the '-Df' flags.\n")); else if (pool_state != POOL_STATE_EXPORTED) (void) printf(gettext("\tThe pool may be active on " "another system, but can be imported using\n\t" "the '-f' flag.\n")); } if (msgid != NULL) { (void) printf(gettext( " see: https://openzfs.github.io/openzfs-docs/msg/%s\n"), msgid); } (void) printf(gettext(" config:\n\n")); cb.cb_namewidth = max_width(NULL, nvroot, 0, strlen(name), VDEV_NAME_TYPE_ID); if (cb.cb_namewidth < 10) cb.cb_namewidth = 10; print_import_config(&cb, name, nvroot, 0); print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_DEDUP); print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_SPECIAL); print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_CLASS_LOGS); if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { (void) printf(gettext("\n\tAdditional devices are known to " "be part of this pool, though their\n\texact " "configuration cannot be determined.\n")); } return (0); } static boolean_t zfs_force_import_required(nvlist_t *config) { uint64_t state; uint64_t hostid = 0; nvlist_t *nvinfo; state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) return (B_TRUE); nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_STATE); if (mmp_state != MMP_STATE_INACTIVE) return (B_TRUE); } return (B_FALSE); } /* * Perform the import for the given configuration. This passes the heavy * lifting off to zpool_import_props(), and then mounts the datasets contained * within the pool. */ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, nvlist_t *props, int flags) { int ret = 0; zpool_handle_t *zhp; char *name; uint64_t version; name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION); if (!SPA_VERSION_IS_SUPPORTED(version)) { (void) fprintf(stderr, gettext("cannot import '%s': pool " "is formatted using an unsupported ZFS version\n"), name); return (1); } else if (zfs_force_import_required(config) && !(flags & ZFS_IMPORT_ANY_HOST)) { mmp_state_t mmp_state = MMP_STATE_INACTIVE; nvlist_t *nvinfo; nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) mmp_state = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_STATE); if (mmp_state == MMP_STATE_ACTIVE) { char *hostname = ""; uint64_t hostid = 0; if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) hostname = fnvlist_lookup_string(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) hostid = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_HOSTID); (void) fprintf(stderr, gettext("cannot import '%s': " "pool is imported on %s (hostid: " "0x%lx)\nExport the pool on the other system, " "then run 'zpool import'.\n"), name, hostname, (unsigned long) hostid); } else if (mmp_state == MMP_STATE_NO_HOSTID) { (void) fprintf(stderr, gettext("Cannot import '%s': " "pool has the multihost property on and the\n" "system's hostid is not set. Set a unique hostid " "with the zgenhostid(8) command.\n"), name); } else { char *hostname = ""; uint64_t timestamp = 0; uint64_t hostid = 0; if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) hostname = fnvlist_lookup_string(config, ZPOOL_CONFIG_HOSTNAME); if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP)) timestamp = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP); if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) hostid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID); (void) fprintf(stderr, gettext("cannot import '%s': " "pool was previously in use from another system.\n" "Last accessed by %s (hostid=%lx) at %s" "The pool can be imported, use 'zpool import -f' " "to import the pool.\n"), name, hostname, (unsigned long)hostid, ctime((time_t *)×tamp)); } return (1); } if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) return (1); if (newname != NULL) name = (char *)newname; if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); /* * Loading keys is best effort. We don't want to return immediately * if it fails but we do want to give the error to the caller. */ if (flags & ZFS_IMPORT_LOAD_KEYS) { ret = zfs_crypto_attempt_load_keys(g_zfs, name); if (ret != 0) ret = 1; } if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_close(zhp); return (1); } zpool_close(zhp); return (ret); } static int import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, char *orig_name, char *new_name, boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all, importargs_t *import) { nvlist_t *config = NULL; nvlist_t *found_config = NULL; uint64_t pool_state; /* * At this point we have a list of import candidate configs. Even if * we were searching by pool name or guid, we still need to * post-process the list to deal with pool state and possible * duplicate names. */ int err = 0; nvpair_t *elem = NULL; boolean_t first = B_TRUE; while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { verify(nvpair_value_nvlist(elem, &config) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &pool_state) == 0); if (!do_destroyed && pool_state == POOL_STATE_DESTROYED) continue; if (do_destroyed && pool_state != POOL_STATE_DESTROYED) continue; verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, import->policy) == 0); if (!pool_specified) { if (first) first = B_FALSE; else if (!do_all) (void) printf("\n"); if (do_all) { err |= do_import(config, NULL, mntopts, props, flags); } else { /* * If we're importing from cachefile, then * we don't want to report errors until we * are in the scan phase of the import. If * we get an error, then we return that error * to invoke the scan phase. */ if (import->cachefile && !import->scan) err = show_import(config, B_FALSE); else (void) show_import(config, B_TRUE); } } else if (import->poolname != NULL) { char *name; /* * We are searching for a pool based on name. */ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); if (strcmp(name, import->poolname) == 0) { if (found_config != NULL) { (void) fprintf(stderr, gettext( "cannot import '%s': more than " "one matching pool\n"), import->poolname); (void) fprintf(stderr, gettext( "import by numeric ID instead\n")); err = B_TRUE; } found_config = config; } } else { uint64_t guid; /* * Search for a pool by guid. */ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); if (guid == import->guid) found_config = config; } } /* * If we were searching for a specific pool, verify that we found a * pool, and then do the import. */ if (pool_specified && err == 0) { if (found_config == NULL) { (void) fprintf(stderr, gettext("cannot import '%s': " "no such pool available\n"), orig_name); err = B_TRUE; } else { err |= do_import(found_config, new_name, mntopts, props, flags); } } /* * If we were just looking for pools, report an error if none were * found. */ if (!pool_specified && first) (void) fprintf(stderr, gettext("no pools available to import\n")); return (err); } typedef struct target_exists_args { const char *poolname; uint64_t poolguid; } target_exists_args_t; static int name_or_guid_exists(zpool_handle_t *zhp, void *data) { target_exists_args_t *args = data; nvlist_t *config = zpool_get_config(zhp, NULL); int found = 0; if (config == NULL) return (0); if (args->poolname != NULL) { char *pool_name; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); if (strcmp(pool_name, args->poolname) == 0) found = 1; } else { uint64_t pool_guid; verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); if (pool_guid == args->poolguid) found = 1; } zpool_close(zhp); return (found); } /* * zpool checkpoint * checkpoint --discard * * -d Discard the checkpoint from a checkpointed * --discard pool. * * -w Wait for discarding a checkpoint to complete. * --wait * * Checkpoints the specified pool, by taking a "snapshot" of its * current state. A pool can only have one checkpoint at a time. */ int zpool_do_checkpoint(int argc, char **argv) { boolean_t discard, wait; char *pool; zpool_handle_t *zhp; int c, err; struct option long_options[] = { {"discard", no_argument, NULL, 'd'}, {"wait", no_argument, NULL, 'w'}, {0, 0, 0, 0} }; discard = B_FALSE; wait = B_FALSE; while ((c = getopt_long(argc, argv, ":dw", long_options, NULL)) != -1) { switch (c) { case 'd': discard = B_TRUE; break; case 'w': wait = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } if (wait && !discard) { (void) fprintf(stderr, gettext("--wait only valid when " "--discard also specified\n")); usage(B_FALSE); } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } pool = argv[0]; if ((zhp = zpool_open(g_zfs, pool)) == NULL) { /* As a special case, check for use of '/' in the name */ if (strchr(pool, '/') != NULL) (void) fprintf(stderr, gettext("'zpool checkpoint' " "doesn't work on datasets. To save the state " "of a dataset from a specific point in time " "please use 'zfs snapshot'\n")); return (1); } if (discard) { err = (zpool_discard_checkpoint(zhp) != 0); if (err == 0 && wait) err = zpool_wait(zhp, ZPOOL_WAIT_CKPT_DISCARD); } else { err = (zpool_checkpoint(zhp) != 0); } zpool_close(zhp); return (err); } #define CHECKPOINT_OPT 1024 /* * zpool import [-d dir] [-D] * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile | -s] [-f] -a * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile | -s] [-f] [-n] [-F] * [newpool] * * -c Read pool information from a cachefile instead of searching * devices. If importing from a cachefile config fails, then * fallback to searching for devices only in the directories that * exist in the cachefile. * * -d Scan in a specific directory, other than /dev/. More than * one directory can be specified using multiple '-d' options. * * -D Scan for previously destroyed pools or import all or only * specified destroyed pools. * * -R Temporarily import the pool, with all mountpoints relative to * the given root. The pool will remain exported when the machine * is rebooted. * * -V Import even in the presence of faulted vdevs. This is an * intentionally undocumented option for testing purposes, and * treats the pool configuration as complete, leaving any bad * vdevs in the FAULTED state. In other words, it does verbatim * import. * * -f Force import, even if it appears that the pool is active. * * -F Attempt rewind if necessary. * * -n See if rewind would work, but don't actually rewind. * * -N Import the pool but don't mount datasets. * * -T Specify a starting txg to use for import. This option is * intentionally undocumented option for testing purposes. * * -a Import all pools found. * * -l Load encryption keys while importing. * * -o Set property=value and/or temporary mount options (without '='). * * -s Scan using the default search path, the libblkid cache will * not be consulted. * * --rewind-to-checkpoint * Import the pool and revert back to the checkpoint. * * The import command scans for pools to import, and import pools based on pool * name and GUID. The pool can also be renamed as part of the import process. */ int zpool_do_import(int argc, char **argv) { char **searchdirs = NULL; char *env, *envdup = NULL; int nsearch = 0; int c; int err = 0; nvlist_t *pools = NULL; boolean_t do_all = B_FALSE; boolean_t do_destroyed = B_FALSE; char *mntopts = NULL; uint64_t searchguid = 0; char *searchname = NULL; char *propval; nvlist_t *policy = NULL; nvlist_t *props = NULL; int flags = ZFS_IMPORT_NORMAL; uint32_t rewind_policy = ZPOOL_NO_REWIND; boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; boolean_t do_scan = B_FALSE; boolean_t pool_exists = B_FALSE; boolean_t pool_specified = B_FALSE; uint64_t txg = -1ULL; char *cachefile = NULL; importargs_t idata = { 0 }; char *endptr; struct option long_options[] = { {"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT}, {0, 0, 0, 0} }; /* check options */ while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX", long_options, NULL)) != -1) { switch (c) { case 'a': do_all = B_TRUE; break; case 'c': cachefile = optarg; break; case 'd': if (searchdirs == NULL) { searchdirs = safe_malloc(sizeof (char *)); } else { char **tmp = safe_malloc((nsearch + 1) * sizeof (char *)); bcopy(searchdirs, tmp, nsearch * sizeof (char *)); free(searchdirs); searchdirs = tmp; } searchdirs[nsearch++] = optarg; break; case 'D': do_destroyed = B_TRUE; break; case 'f': flags |= ZFS_IMPORT_ANY_HOST; break; case 'F': do_rewind = B_TRUE; break; case 'l': flags |= ZFS_IMPORT_LOAD_KEYS; break; case 'm': flags |= ZFS_IMPORT_MISSING_LOG; break; case 'n': dryrun = B_TRUE; break; case 'N': flags |= ZFS_IMPORT_ONLY; break; case 'o': if ((propval = strchr(optarg, '=')) != NULL) { *propval = '\0'; propval++; if (add_prop_list(optarg, propval, &props, B_TRUE)) goto error; } else { mntopts = optarg; } break; case 'R': if (add_prop_list(zpool_prop_to_name( ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) goto error; if (add_prop_list_default(zpool_prop_to_name( ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) goto error; break; case 's': do_scan = B_TRUE; break; case 't': flags |= ZFS_IMPORT_TEMP_NAME; if (add_prop_list_default(zpool_prop_to_name( ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) goto error; break; case 'T': errno = 0; txg = strtoull(optarg, &endptr, 0); if (errno != 0 || *endptr != '\0') { (void) fprintf(stderr, gettext("invalid txg value\n")); usage(B_FALSE); } rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND; break; case 'V': flags |= ZFS_IMPORT_VERBATIM; break; case 'X': xtreme_rewind = B_TRUE; break; case CHECKPOINT_OPT: flags |= ZFS_IMPORT_CHECKPOINT; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (cachefile && nsearch != 0) { (void) fprintf(stderr, gettext("-c is incompatible with -d\n")); usage(B_FALSE); } if (cachefile && do_scan) { (void) fprintf(stderr, gettext("-c is incompatible with -s\n")); usage(B_FALSE); } if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) { (void) fprintf(stderr, gettext("-l is incompatible with -N\n")); usage(B_FALSE); } if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) { (void) fprintf(stderr, gettext("-l is only meaningful during " "an import\n")); usage(B_FALSE); } if ((dryrun || xtreme_rewind) && !do_rewind) { (void) fprintf(stderr, gettext("-n or -X only meaningful with -F\n")); usage(B_FALSE); } if (dryrun) rewind_policy = ZPOOL_TRY_REWIND; else if (do_rewind) rewind_policy = ZPOOL_DO_REWIND; if (xtreme_rewind) rewind_policy |= ZPOOL_EXTREME_REWIND; /* In the future, we can capture further policy and include it here */ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, txg) != 0 || nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind_policy) != 0) goto error; /* check argument count */ if (do_all) { if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } } else { if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } } /* * Check for the effective uid. We do this explicitly here because * otherwise any attempt to discover pools will silently fail. */ if (argc == 0 && geteuid() != 0) { (void) fprintf(stderr, gettext("cannot " "discover pools: permission denied\n")); if (searchdirs != NULL) free(searchdirs); nvlist_free(props); nvlist_free(policy); return (1); } /* * Depending on the arguments given, we do one of the following: * * Iterate through all pools and display information about * each one. * * -a Iterate through all pools and try to import each one. * * Find the pool that corresponds to the given GUID/pool * name and import that one. * * -D Above options applies only to destroyed pools. */ if (argc != 0) { char *endptr; errno = 0; searchguid = strtoull(argv[0], &endptr, 10); if (errno != 0 || *endptr != '\0') { searchname = argv[0]; searchguid = 0; } pool_specified = B_TRUE; /* * User specified a name or guid. Ensure it's unique. */ target_exists_args_t search = {searchname, searchguid}; pool_exists = zpool_iter(g_zfs, name_or_guid_exists, &search); } /* * Check the environment for the preferred search path. */ if ((searchdirs == NULL) && (env = getenv("ZPOOL_IMPORT_PATH"))) { char *dir; envdup = strdup(env); dir = strtok(envdup, ":"); while (dir != NULL) { if (searchdirs == NULL) { searchdirs = safe_malloc(sizeof (char *)); } else { char **tmp = safe_malloc((nsearch + 1) * sizeof (char *)); bcopy(searchdirs, tmp, nsearch * sizeof (char *)); free(searchdirs); searchdirs = tmp; } searchdirs[nsearch++] = dir; dir = strtok(NULL, ":"); } } idata.path = searchdirs; idata.paths = nsearch; idata.poolname = searchname; idata.guid = searchguid; idata.cachefile = cachefile; idata.scan = do_scan; idata.policy = policy; pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops); if (pools != NULL && pool_exists && (argc == 1 || strcmp(argv[0], argv[1]) == 0)) { (void) fprintf(stderr, gettext("cannot import '%s': " "a pool with that name already exists\n"), argv[0]); (void) fprintf(stderr, gettext("use the form '%s " " ' to give it a new name\n"), "zpool import"); err = 1; } else if (pools == NULL && pool_exists) { (void) fprintf(stderr, gettext("cannot import '%s': " "a pool with that name is already created/imported,\n"), argv[0]); (void) fprintf(stderr, gettext("and no additional pools " "with that name were found\n")); err = 1; } else if (pools == NULL) { if (argc != 0) { (void) fprintf(stderr, gettext("cannot import '%s': " "no such pool available\n"), argv[0]); } err = 1; } if (err == 1) { if (searchdirs != NULL) free(searchdirs); if (envdup != NULL) free(envdup); nvlist_free(policy); nvlist_free(pools); nvlist_free(props); return (1); } err = import_pools(pools, props, mntopts, flags, argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, do_destroyed, pool_specified, do_all, &idata); /* * If we're using the cachefile and we failed to import, then * fallback to scanning the directory for pools that match * those in the cachefile. */ if (err != 0 && cachefile != NULL) { (void) printf(gettext("cachefile import failed, retrying\n")); /* * We use the scan flag to gather the directories that exist * in the cachefile. If we need to fallback to searching for * the pool config, we will only search devices in these * directories. */ idata.scan = B_TRUE; nvlist_free(pools); pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops); err = import_pools(pools, props, mntopts, flags, argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, do_destroyed, pool_specified, do_all, &idata); } error: nvlist_free(props); nvlist_free(pools); nvlist_free(policy); if (searchdirs != NULL) free(searchdirs); if (envdup != NULL) free(envdup); return (err ? 1 : 0); } /* * zpool sync [-f] [pool] ... * * -f (undocumented) force uberblock (and config including zpool cache file) * update. * * Sync the specified pool(s). * Without arguments "zpool sync" will sync all pools. * This command initiates TXG sync(s) and will return after the TXG(s) commit. * */ static int zpool_do_sync(int argc, char **argv) { int ret; boolean_t force = B_FALSE; /* check options */ while ((ret = getopt(argc, argv, "f")) != -1) { switch (ret) { case 'f': force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* if argc == 0 we will execute zpool_sync_one on all pools */ ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, zpool_sync_one, &force); return (ret); } typedef struct iostat_cbdata { uint64_t cb_flags; int cb_name_flags; int cb_namewidth; int cb_iteration; char **cb_vdev_names; /* Only show these vdevs */ unsigned int cb_vdev_names_count; boolean_t cb_verbose; boolean_t cb_literal; boolean_t cb_scripted; zpool_list_t *cb_list; vdev_cmd_data_list_t *vcdl; } iostat_cbdata_t; /* iostat labels */ typedef struct name_and_columns { const char *name; /* Column name */ unsigned int columns; /* Center name to this number of columns */ } name_and_columns_t; #define IOSTAT_MAX_LABELS 13 /* Max number of labels on one line */ static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] = { [IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2}, {NULL}}, [IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2}, {"asyncq_wait", 2}, {"scrub", 1}, {"trim", 1}, {NULL}}, [IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2}, {"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2}, {"trimq_write", 2}, {NULL}}, [IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2}, {"asyncq_wait", 2}, {NULL}}, [IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2}, {"async_read", 2}, {"async_write", 2}, {"scrub", 2}, {"trim", 2}, {NULL}}, }; /* Shorthand - if "columns" field not set, default to 1 column */ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] = { [IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"}, {"write"}, {NULL}}, [IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"wait"}, {"wait"}, {NULL}}, [IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}}, [IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"scrub"}, {"trim"}, {NULL}}, [IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}}, }; static const char *histo_to_title[] = { [IOS_L_HISTO] = "latency", [IOS_RQ_HISTO] = "req_size", }; /* * Return the number of labels in a null-terminated name_and_columns_t * array. * */ static unsigned int label_array_len(const name_and_columns_t *labels) { int i = 0; while (labels[i].name) i++; return (i); } /* * Return the number of strings in a null-terminated string array. * For example: * * const char foo[] = {"bar", "baz", NULL} * * returns 2 */ static uint64_t str_array_len(const char *array[]) { uint64_t i = 0; while (array[i]) i++; return (i); } /* * Return a default column width for default/latency/queue columns. This does * not include histograms, which have their columns autosized. */ static unsigned int default_column_width(iostat_cbdata_t *cb, enum iostat_type type) { unsigned long column_width = 5; /* Normal niceprint */ static unsigned long widths[] = { /* * Choose some sane default column sizes for printing the * raw numbers. */ [IOS_DEFAULT] = 15, /* 1PB capacity */ [IOS_LATENCY] = 10, /* 1B ns = 10sec */ [IOS_QUEUES] = 6, /* 1M queue entries */ [IOS_L_HISTO] = 10, /* 1B ns = 10sec */ [IOS_RQ_HISTO] = 6, /* 1M queue entries */ }; if (cb->cb_literal) column_width = widths[type]; return (column_width); } /* * Print the column labels, i.e: * * capacity operations bandwidth * alloc free read write read write ... * * If force_column_width is set, use it for the column width. If not set, use * the default column width. */ static void print_iostat_labels(iostat_cbdata_t *cb, unsigned int force_column_width, const name_and_columns_t labels[][IOSTAT_MAX_LABELS]) { int i, idx, s; int text_start, rw_column_width, spaces_to_end; uint64_t flags = cb->cb_flags; uint64_t f; unsigned int column_width = force_column_width; /* For each bit set in flags */ for (f = flags; f; f &= ~(1ULL << idx)) { idx = lowbit64(f) - 1; if (!force_column_width) column_width = default_column_width(cb, idx); /* Print our top labels centered over "read write" label. */ for (i = 0; i < label_array_len(labels[idx]); i++) { const char *name = labels[idx][i].name; /* * We treat labels[][].columns == 0 as shorthand * for one column. It makes writing out the label * tables more concise. */ unsigned int columns = MAX(1, labels[idx][i].columns); unsigned int slen = strlen(name); rw_column_width = (column_width * columns) + (2 * (columns - 1)); text_start = (int)((rw_column_width) / columns - slen / columns); if (text_start < 0) text_start = 0; printf(" "); /* Two spaces between columns */ /* Space from beginning of column to label */ for (s = 0; s < text_start; s++) printf(" "); printf("%s", name); /* Print space after label to end of column */ spaces_to_end = rw_column_width - text_start - slen; if (spaces_to_end < 0) spaces_to_end = 0; for (s = 0; s < spaces_to_end; s++) printf(" "); } } } /* * print_cmd_columns - Print custom column titles from -c * * If the user specified the "zpool status|iostat -c" then print their custom * column titles in the header. For example, print_cmd_columns() would print * the " col1 col2" part of this: * * $ zpool iostat -vc 'echo col1=val1; echo col2=val2' * ... * capacity operations bandwidth * pool alloc free read write read write col1 col2 * ---------- ----- ----- ----- ----- ----- ----- ---- ---- * mypool 269K 1008M 0 0 107 946 * mirror 269K 1008M 0 0 107 946 * sdb - - 0 0 102 473 val1 val2 * sdc - - 0 0 5 473 val1 val2 * ---------- ----- ----- ----- ----- ----- ----- ---- ---- */ static void print_cmd_columns(vdev_cmd_data_list_t *vcdl, int use_dashes) { int i, j; vdev_cmd_data_t *data = &vcdl->data[0]; if (vcdl->count == 0 || data == NULL) return; /* * Each vdev cmd should have the same column names unless the user did * something weird with their cmd. Just take the column names from the * first vdev and assume it works for all of them. */ for (i = 0; i < vcdl->uniq_cols_cnt; i++) { printf(" "); if (use_dashes) { for (j = 0; j < vcdl->uniq_cols_width[i]; j++) printf("-"); } else { printf_color(ANSI_BOLD, "%*s", vcdl->uniq_cols_width[i], vcdl->uniq_cols[i]); } } } /* * Utility function to print out a line of dashes like: * * -------------------------------- ----- ----- ----- ----- ----- * * ...or a dashed named-row line like: * * logs - - - - - * * @cb: iostat data * * @force_column_width If non-zero, use the value as the column width. * Otherwise use the default column widths. * * @name: Print a dashed named-row line starting * with @name. Otherwise, print a regular * dashed line. */ static void print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width, const char *name) { int i; unsigned int namewidth; uint64_t flags = cb->cb_flags; uint64_t f; int idx; const name_and_columns_t *labels; const char *title; if (cb->cb_flags & IOS_ANYHISTO_M) { title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; } else if (cb->cb_vdev_names_count) { title = "vdev"; } else { title = "pool"; } namewidth = MAX(MAX(strlen(title), cb->cb_namewidth), name ? strlen(name) : 0); if (name) { printf("%-*s", namewidth, name); } else { for (i = 0; i < namewidth; i++) (void) printf("-"); } /* For each bit in flags */ for (f = flags; f; f &= ~(1ULL << idx)) { unsigned int column_width; idx = lowbit64(f) - 1; if (force_column_width) column_width = force_column_width; else column_width = default_column_width(cb, idx); labels = iostat_bottom_labels[idx]; for (i = 0; i < label_array_len(labels); i++) { if (name) printf(" %*s-", column_width - 1, " "); else printf(" %.*s", column_width, "--------------------"); } } } static void print_iostat_separator_impl(iostat_cbdata_t *cb, unsigned int force_column_width) { print_iostat_dashes(cb, force_column_width, NULL); } static void print_iostat_separator(iostat_cbdata_t *cb) { print_iostat_separator_impl(cb, 0); } static void print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, const char *histo_vdev_name) { unsigned int namewidth; const char *title; color_start(ANSI_BOLD); if (cb->cb_flags & IOS_ANYHISTO_M) { title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; } else if (cb->cb_vdev_names_count) { title = "vdev"; } else { title = "pool"; } namewidth = MAX(MAX(strlen(title), cb->cb_namewidth), histo_vdev_name ? strlen(histo_vdev_name) : 0); if (histo_vdev_name) printf("%-*s", namewidth, histo_vdev_name); else printf("%*s", namewidth, ""); print_iostat_labels(cb, force_column_width, iostat_top_labels); printf("\n"); printf("%-*s", namewidth, title); print_iostat_labels(cb, force_column_width, iostat_bottom_labels); if (cb->vcdl != NULL) print_cmd_columns(cb->vcdl, 0); printf("\n"); print_iostat_separator_impl(cb, force_column_width); if (cb->vcdl != NULL) print_cmd_columns(cb->vcdl, 1); color_end(); printf("\n"); } static void print_iostat_header(iostat_cbdata_t *cb) { print_iostat_header_impl(cb, 0, NULL); } /* * Prints a size string (i.e. 120M) with the suffix ("M") colored * by order of magnitude. Uses column_size to add padding. */ static void print_stat_color(const char *statbuf, unsigned int column_size) { fputs(" ", stdout); size_t len = strlen(statbuf); while (len < column_size) { fputc(' ', stdout); column_size--; } if (*statbuf == '0') { color_start(ANSI_GRAY); fputc('0', stdout); } else { for (; *statbuf; statbuf++) { if (*statbuf == 'K') color_start(ANSI_GREEN); else if (*statbuf == 'M') color_start(ANSI_YELLOW); else if (*statbuf == 'G') color_start(ANSI_RED); else if (*statbuf == 'T') color_start(ANSI_BOLD_BLUE); else if (*statbuf == 'P') color_start(ANSI_MAGENTA); else if (*statbuf == 'E') color_start(ANSI_CYAN); fputc(*statbuf, stdout); if (--column_size <= 0) break; } } color_end(); } /* * Display a single statistic. */ static void print_one_stat(uint64_t value, enum zfs_nicenum_format format, unsigned int column_size, boolean_t scripted) { char buf[64]; zfs_nicenum_format(value, buf, sizeof (buf), format); if (scripted) printf("\t%s", buf); else print_stat_color(buf, column_size); } /* * Calculate the default vdev stats * * Subtract oldvs from newvs, apply a scaling factor, and save the resulting * stats into calcvs. */ static void calc_default_iostats(vdev_stat_t *oldvs, vdev_stat_t *newvs, vdev_stat_t *calcvs) { int i; memcpy(calcvs, newvs, sizeof (*calcvs)); for (i = 0; i < ARRAY_SIZE(calcvs->vs_ops); i++) calcvs->vs_ops[i] = (newvs->vs_ops[i] - oldvs->vs_ops[i]); for (i = 0; i < ARRAY_SIZE(calcvs->vs_bytes); i++) calcvs->vs_bytes[i] = (newvs->vs_bytes[i] - oldvs->vs_bytes[i]); } /* * Internal representation of the extended iostats data. * * The extended iostat stats are exported in nvlists as either uint64_t arrays * or single uint64_t's. We make both look like arrays to make them easier * to process. In order to make single uint64_t's look like arrays, we set * __data to the stat data, and then set *data = &__data with count = 1. Then, * we can just use *data and count. */ struct stat_array { uint64_t *data; uint_t count; /* Number of entries in data[] */ uint64_t __data; /* Only used when data is a single uint64_t */ }; static uint64_t stat_histo_max(struct stat_array *nva, unsigned int len) { uint64_t max = 0; int i; for (i = 0; i < len; i++) max = MAX(max, array64_max(nva[i].data, nva[i].count)); return (max); } /* * Helper function to lookup a uint64_t array or uint64_t value and store its * data as a stat_array. If the nvpair is a single uint64_t value, then we make * it look like a one element array to make it easier to process. */ static int nvpair64_to_stat_array(nvlist_t *nvl, const char *name, struct stat_array *nva) { nvpair_t *tmp; int ret; verify(nvlist_lookup_nvpair(nvl, name, &tmp) == 0); switch (nvpair_type(tmp)) { case DATA_TYPE_UINT64_ARRAY: ret = nvpair_value_uint64_array(tmp, &nva->data, &nva->count); break; case DATA_TYPE_UINT64: ret = nvpair_value_uint64(tmp, &nva->__data); nva->data = &nva->__data; nva->count = 1; break; default: /* Not a uint64_t */ ret = EINVAL; break; } return (ret); } /* * Given a list of nvlist names, look up the extended stats in newnv and oldnv, * subtract them, and return the results in a newly allocated stat_array. * You must free the returned array after you are done with it with * free_calc_stats(). * * Additionally, you can set "oldnv" to NULL if you simply want the newnv * values. */ static struct stat_array * calc_and_alloc_stats_ex(const char **names, unsigned int len, nvlist_t *oldnv, nvlist_t *newnv) { nvlist_t *oldnvx = NULL, *newnvx; struct stat_array *oldnva, *newnva, *calcnva; int i, j; unsigned int alloc_size = (sizeof (struct stat_array)) * len; /* Extract our extended stats nvlist from the main list */ verify(nvlist_lookup_nvlist(newnv, ZPOOL_CONFIG_VDEV_STATS_EX, &newnvx) == 0); if (oldnv) { verify(nvlist_lookup_nvlist(oldnv, ZPOOL_CONFIG_VDEV_STATS_EX, &oldnvx) == 0); } newnva = safe_malloc(alloc_size); oldnva = safe_malloc(alloc_size); calcnva = safe_malloc(alloc_size); for (j = 0; j < len; j++) { verify(nvpair64_to_stat_array(newnvx, names[j], &newnva[j]) == 0); calcnva[j].count = newnva[j].count; alloc_size = calcnva[j].count * sizeof (calcnva[j].data[0]); calcnva[j].data = safe_malloc(alloc_size); memcpy(calcnva[j].data, newnva[j].data, alloc_size); if (oldnvx) { verify(nvpair64_to_stat_array(oldnvx, names[j], &oldnva[j]) == 0); for (i = 0; i < oldnva[j].count; i++) calcnva[j].data[i] -= oldnva[j].data[i]; } } free(newnva); free(oldnva); return (calcnva); } static void free_calc_stats(struct stat_array *nva, unsigned int len) { int i; for (i = 0; i < len; i++) free(nva[i].data); free(nva); } static void print_iostat_histo(struct stat_array *nva, unsigned int len, iostat_cbdata_t *cb, unsigned int column_width, unsigned int namewidth, double scale) { int i, j; char buf[6]; uint64_t val; enum zfs_nicenum_format format; unsigned int buckets; unsigned int start_bucket; if (cb->cb_literal) format = ZFS_NICENUM_RAW; else format = ZFS_NICENUM_1024; /* All these histos are the same size, so just use nva[0].count */ buckets = nva[0].count; if (cb->cb_flags & IOS_RQ_HISTO_M) { /* Start at 512 - req size should never be lower than this */ start_bucket = 9; } else { start_bucket = 0; } for (j = start_bucket; j < buckets; j++) { /* Print histogram bucket label */ if (cb->cb_flags & IOS_L_HISTO_M) { /* Ending range of this bucket */ val = (1UL << (j + 1)) - 1; zfs_nicetime(val, buf, sizeof (buf)); } else { /* Request size (starting range of bucket) */ val = (1UL << j); zfs_nicenum(val, buf, sizeof (buf)); } if (cb->cb_scripted) printf("%llu", (u_longlong_t)val); else printf("%-*s", namewidth, buf); /* Print the values on the line */ for (i = 0; i < len; i++) { print_one_stat(nva[i].data[j] * scale, format, column_width, cb->cb_scripted); } printf("\n"); } } static void print_solid_separator(unsigned int length) { while (length--) printf("-"); printf("\n"); } static void print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv, nvlist_t *newnv, double scale, const char *name) { unsigned int column_width; unsigned int namewidth; unsigned int entire_width; enum iostat_type type; struct stat_array *nva; const char **names; unsigned int names_len; /* What type of histo are we? */ type = IOS_HISTO_IDX(cb->cb_flags); /* Get NULL-terminated array of nvlist names for our histo */ names = vsx_type_to_nvlist[type]; names_len = str_array_len(names); /* num of names */ nva = calc_and_alloc_stats_ex(names, names_len, oldnv, newnv); if (cb->cb_literal) { column_width = MAX(5, (unsigned int) log10(stat_histo_max(nva, names_len)) + 1); } else { column_width = 5; } namewidth = MAX(cb->cb_namewidth, strlen(histo_to_title[IOS_HISTO_IDX(cb->cb_flags)])); /* * Calculate the entire line width of what we're printing. The * +2 is for the two spaces between columns: */ /* read write */ /* ----- ----- */ /* |___| <---------- column_width */ /* */ /* |__________| <--- entire_width */ /* */ entire_width = namewidth + (column_width + 2) * label_array_len(iostat_bottom_labels[type]); if (cb->cb_scripted) printf("%s\n", name); else print_iostat_header_impl(cb, column_width, name); print_iostat_histo(nva, names_len, cb, column_width, namewidth, scale); free_calc_stats(nva, names_len); if (!cb->cb_scripted) print_solid_separator(entire_width); } /* * Calculate the average latency of a power-of-two latency histogram */ static uint64_t single_histo_average(uint64_t *histo, unsigned int buckets) { int i; uint64_t count = 0, total = 0; for (i = 0; i < buckets; i++) { /* * Our buckets are power-of-two latency ranges. Use the * midpoint latency of each bucket to calculate the average. * For example: * * Bucket Midpoint * 8ns-15ns: 12ns * 16ns-31ns: 24ns * ... */ if (histo[i] != 0) { total += histo[i] * (((1UL << i) + ((1UL << i)/2))); count += histo[i]; } } /* Prevent divide by zero */ return (count == 0 ? 0 : total / count); } static void print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv, nvlist_t *newnv) { int i; uint64_t val; const char *names[] = { ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE, ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE, }; struct stat_array *nva; unsigned int column_width = default_column_width(cb, IOS_QUEUES); enum zfs_nicenum_format format; nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), NULL, newnv); if (cb->cb_literal) format = ZFS_NICENUM_RAW; else format = ZFS_NICENUM_1024; for (i = 0; i < ARRAY_SIZE(names); i++) { val = nva[i].data[0]; print_one_stat(val, format, column_width, cb->cb_scripted); } free_calc_stats(nva, ARRAY_SIZE(names)); } static void print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv, nvlist_t *newnv) { int i; uint64_t val; const char *names[] = { ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, }; struct stat_array *nva; unsigned int column_width = default_column_width(cb, IOS_LATENCY); enum zfs_nicenum_format format; nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv); if (cb->cb_literal) format = ZFS_NICENUM_RAWTIME; else format = ZFS_NICENUM_TIME; /* Print our avg latencies on the line */ for (i = 0; i < ARRAY_SIZE(names); i++) { /* Compute average latency for a latency histo */ val = single_histo_average(nva[i].data, nva[i].count); print_one_stat(val, format, column_width, cb->cb_scripted); } free_calc_stats(nva, ARRAY_SIZE(names)); } /* * Print default statistics (capacity/operations/bandwidth) */ static void print_iostat_default(vdev_stat_t *vs, iostat_cbdata_t *cb, double scale) { unsigned int column_width = default_column_width(cb, IOS_DEFAULT); enum zfs_nicenum_format format; char na; /* char to print for "not applicable" values */ if (cb->cb_literal) { format = ZFS_NICENUM_RAW; na = '0'; } else { format = ZFS_NICENUM_1024; na = '-'; } /* only toplevel vdevs have capacity stats */ if (vs->vs_space == 0) { if (cb->cb_scripted) printf("\t%c\t%c", na, na); else printf(" %*c %*c", column_width, na, column_width, na); } else { print_one_stat(vs->vs_alloc, format, column_width, cb->cb_scripted); print_one_stat(vs->vs_space - vs->vs_alloc, format, column_width, cb->cb_scripted); } print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_READ] * scale), format, column_width, cb->cb_scripted); print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_WRITE] * scale), format, column_width, cb->cb_scripted); print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_READ] * scale), format, column_width, cb->cb_scripted); print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_WRITE] * scale), format, column_width, cb->cb_scripted); } static const char *class_name[] = { VDEV_ALLOC_BIAS_DEDUP, VDEV_ALLOC_BIAS_SPECIAL, VDEV_ALLOC_CLASS_LOGS }; /* * Print out all the statistics for the given vdev. This can either be the * toplevel configuration, or called recursively. If 'name' is NULL, then this * is a verbose output, and we don't want to display the toplevel pool stats. * * Returns the number of stat lines printed. */ static unsigned int print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, nvlist_t *newnv, iostat_cbdata_t *cb, int depth) { nvlist_t **oldchild, **newchild; uint_t c, children, oldchildren; vdev_stat_t *oldvs, *newvs, *calcvs; vdev_stat_t zerovs = { 0 }; char *vname; int i; int ret = 0; uint64_t tdelta; double scale; if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) return (ret); calcvs = safe_malloc(sizeof (*calcvs)); if (oldnv != NULL) { verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0); } else { oldvs = &zerovs; } /* Do we only want to see a specific vdev? */ for (i = 0; i < cb->cb_vdev_names_count; i++) { /* Yes we do. Is this the vdev? */ if (strcmp(name, cb->cb_vdev_names[i]) == 0) { /* * This is our vdev. Since it is the only vdev we * will be displaying, make depth = 0 so that it * doesn't get indented. */ depth = 0; break; } } if (cb->cb_vdev_names_count && (i == cb->cb_vdev_names_count)) { /* Couldn't match the name */ goto children; } verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&newvs, &c) == 0); /* * Print the vdev name unless it's is a histogram. Histograms * display the vdev name in the header itself. */ if (!(cb->cb_flags & IOS_ANYHISTO_M)) { if (cb->cb_scripted) { printf("%s", name); } else { if (strlen(name) + depth > cb->cb_namewidth) (void) printf("%*s%s", depth, "", name); else (void) printf("%*s%s%*s", depth, "", name, (int)(cb->cb_namewidth - strlen(name) - depth), ""); } } /* Calculate our scaling factor */ tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_ANYHISTO_M)) { /* * If we specify printing histograms with no time interval, then * print the histogram numbers over the entire lifetime of the * vdev. */ scale = 1; } else { if (tdelta == 0) scale = 1.0; else scale = (double)NANOSEC / tdelta; } if (cb->cb_flags & IOS_DEFAULT_M) { calc_default_iostats(oldvs, newvs, calcvs); print_iostat_default(calcvs, cb, scale); } if (cb->cb_flags & IOS_LATENCY_M) print_iostat_latency(cb, oldnv, newnv); if (cb->cb_flags & IOS_QUEUES_M) print_iostat_queues(cb, oldnv, newnv); if (cb->cb_flags & IOS_ANYHISTO_M) { printf("\n"); print_iostat_histos(cb, oldnv, newnv, scale, name); } if (cb->vcdl != NULL) { char *path; if (nvlist_lookup_string(newnv, ZPOOL_CONFIG_PATH, &path) == 0) { printf(" "); zpool_print_cmd(cb->vcdl, zpool_get_name(zhp), path); } } if (!(cb->cb_flags & IOS_ANYHISTO_M)) printf("\n"); ret++; children: free(calcvs); if (!cb->cb_verbose) return (ret); if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN, &newchild, &children) != 0) return (ret); if (oldnv) { if (nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN, &oldchild, &oldchildren) != 0) return (ret); children = MIN(oldchildren, children); } /* * print normal top-level devices */ for (c = 0; c < children; c++) { uint64_t ishole = B_FALSE, islog = B_FALSE; (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_HOLE, &ishole); (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, &islog); if (ishole || islog) continue; if (nvlist_exists(newchild[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) continue; vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); } /* * print all other top-level devices */ for (uint_t n = 0; n < 3; n++) { boolean_t printed = B_FALSE; for (c = 0; c < children; c++) { uint64_t islog = B_FALSE; char *bias = NULL; char *type = NULL; (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, &islog); if (islog) { bias = VDEV_ALLOC_CLASS_LOGS; } else { (void) nvlist_lookup_string(newchild[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); (void) nvlist_lookup_string(newchild[c], ZPOOL_CONFIG_TYPE, &type); } if (bias == NULL || strcmp(bias, class_name[n]) != 0) continue; if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0) continue; if (!printed) { if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted && !cb->cb_vdev_names) { print_iostat_dashes(cb, 0, class_name[n]); } printf("\n"); printed = B_TRUE; } vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); } } /* * Include level 2 ARC devices in iostat output */ if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE, &newchild, &children) != 0) return (ret); if (oldnv) { if (nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE, &oldchild, &oldchildren) != 0) return (ret); children = MIN(oldchildren, children); } if (children > 0) { if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted && !cb->cb_vdev_names) { print_iostat_dashes(cb, 0, "cache"); } printf("\n"); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); } } return (ret); } static int refresh_iostat(zpool_handle_t *zhp, void *data) { iostat_cbdata_t *cb = data; boolean_t missing; /* * If the pool has disappeared, remove it from the list and continue. */ if (zpool_refresh_stats(zhp, &missing) != 0) return (-1); if (missing) pool_list_remove(cb->cb_list, zhp); return (0); } /* * Callback to print out the iostats for the given pool. */ static int print_iostat(zpool_handle_t *zhp, void *data) { iostat_cbdata_t *cb = data; nvlist_t *oldconfig, *newconfig; nvlist_t *oldnvroot, *newnvroot; int ret; newconfig = zpool_get_config(zhp, &oldconfig); if (cb->cb_iteration == 1) oldconfig = NULL; verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE, &newnvroot) == 0); if (oldconfig == NULL) oldnvroot = NULL; else verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE, &oldnvroot) == 0); ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); if ((ret != 0) && !(cb->cb_flags & IOS_ANYHISTO_M) && !cb->cb_scripted && cb->cb_verbose && !cb->cb_vdev_names_count) { print_iostat_separator(cb); if (cb->vcdl != NULL) { print_cmd_columns(cb->vcdl, 1); } printf("\n"); } return (ret); } static int get_columns(void) { struct winsize ws; int columns = 80; int error; if (isatty(STDOUT_FILENO)) { error = ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws); if (error == 0) columns = ws.ws_col; } else { columns = 999; } return (columns); } /* * Return the required length of the pool/vdev name column. The minimum * allowed width and output formatting flags must be provided. */ static int get_namewidth(zpool_handle_t *zhp, int min_width, int flags, boolean_t verbose) { nvlist_t *config, *nvroot; int width = min_width; if ((config = zpool_get_config(zhp, NULL)) != NULL) { verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); unsigned int poolname_len = strlen(zpool_get_name(zhp)); if (verbose == B_FALSE) { width = MAX(poolname_len, min_width); } else { width = MAX(poolname_len, max_width(zhp, nvroot, 0, min_width, flags)); } } return (width); } /* * Parse the input string, get the 'interval' and 'count' value if there is one. */ static void get_interval_count(int *argcp, char **argv, float *iv, unsigned long *cnt) { float interval = 0; unsigned long count = 0; int argc = *argcp; /* * Determine if the last argument is an integer or a pool name */ if (argc > 0 && zfs_isnumber(argv[argc - 1])) { char *end; errno = 0; interval = strtof(argv[argc - 1], &end); if (*end == '\0' && errno == 0) { if (interval == 0) { (void) fprintf(stderr, gettext( "interval cannot be zero\n")); usage(B_FALSE); } /* * Ignore the last parameter */ argc--; } else { /* * If this is not a valid number, just plow on. The * user will get a more informative error message later * on. */ interval = 0; } } /* * If the last argument is also an integer, then we have both a count * and an interval. */ if (argc > 0 && zfs_isnumber(argv[argc - 1])) { char *end; errno = 0; count = interval; interval = strtof(argv[argc - 1], &end); if (*end == '\0' && errno == 0) { if (interval == 0) { (void) fprintf(stderr, gettext( "interval cannot be zero\n")); usage(B_FALSE); } /* * Ignore the last parameter */ argc--; } else { interval = 0; } } *iv = interval; *cnt = count; *argcp = argc; } static void get_timestamp_arg(char c) { if (c == 'u') timestamp_fmt = UDATE; else if (c == 'd') timestamp_fmt = DDATE; else usage(B_FALSE); } /* * Return stat flags that are supported by all pools by both the module and * zpool iostat. "*data" should be initialized to all 0xFFs before running. * It will get ANDed down until only the flags that are supported on all pools * remain. */ static int get_stat_flags_cb(zpool_handle_t *zhp, void *data) { uint64_t *mask = data; nvlist_t *config, *nvroot, *nvx; uint64_t flags = 0; int i, j; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); /* Default stats are always supported, but for completeness.. */ if (nvlist_exists(nvroot, ZPOOL_CONFIG_VDEV_STATS)) flags |= IOS_DEFAULT_M; /* Get our extended stats nvlist from the main list */ if (nvlist_lookup_nvlist(nvroot, ZPOOL_CONFIG_VDEV_STATS_EX, &nvx) != 0) { /* * No extended stats; they're probably running an older * module. No big deal, we support that too. */ goto end; } /* For each extended stat, make sure all its nvpairs are supported */ for (j = 0; j < ARRAY_SIZE(vsx_type_to_nvlist); j++) { if (!vsx_type_to_nvlist[j][0]) continue; /* Start off by assuming the flag is supported, then check */ flags |= (1ULL << j); for (i = 0; vsx_type_to_nvlist[j][i]; i++) { if (!nvlist_exists(nvx, vsx_type_to_nvlist[j][i])) { /* flag isn't supported */ flags = flags & ~(1ULL << j); break; } } } end: *mask = *mask & flags; return (0); } /* * Return a bitmask of stats that are supported on all pools by both the module * and zpool iostat. */ static uint64_t get_stat_flags(zpool_list_t *list) { uint64_t mask = -1; /* * get_stat_flags_cb() will lop off bits from "mask" until only the * flags that are supported on all pools remain. */ pool_list_iter(list, B_FALSE, get_stat_flags_cb, &mask); return (mask); } /* * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise. */ static int is_vdev_cb(void *zhp_data, nvlist_t *nv, void *cb_data) { iostat_cbdata_t *cb = cb_data; char *name = NULL; int ret = 0; zpool_handle_t *zhp = zhp_data; name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags); if (strcmp(name, cb->cb_vdev_names[0]) == 0) ret = 1; /* match */ free(name); return (ret); } /* * Returns 1 if cb_data->cb_vdev_names[0] is a vdev name, 0 otherwise. */ static int is_vdev(zpool_handle_t *zhp, void *cb_data) { return (for_each_vdev(zhp, is_vdev_cb, cb_data)); } /* * Check if vdevs are in a pool * * Return 1 if all argv[] strings are vdev names in pool "pool_name". Otherwise * return 0. If pool_name is NULL, then search all pools. */ static int are_vdevs_in_pool(int argc, char **argv, char *pool_name, iostat_cbdata_t *cb) { char **tmp_name; int ret = 0; int i; int pool_count = 0; if ((argc == 0) || !*argv) return (0); if (pool_name) pool_count = 1; /* Temporarily hijack cb_vdev_names for a second... */ tmp_name = cb->cb_vdev_names; /* Go though our list of prospective vdev names */ for (i = 0; i < argc; i++) { cb->cb_vdev_names = argv + i; /* Is this name a vdev in our pools? */ ret = for_each_pool(pool_count, &pool_name, B_TRUE, NULL, B_FALSE, is_vdev, cb); if (!ret) { /* No match */ break; } } cb->cb_vdev_names = tmp_name; return (ret); } static int is_pool_cb(zpool_handle_t *zhp, void *data) { char *name = data; if (strcmp(name, zpool_get_name(zhp)) == 0) return (1); return (0); } /* * Do we have a pool named *name? If so, return 1, otherwise 0. */ static int is_pool(char *name) { return (for_each_pool(0, NULL, B_TRUE, NULL, B_FALSE, is_pool_cb, name)); } /* Are all our argv[] strings pool names? If so return 1, 0 otherwise. */ static int are_all_pools(int argc, char **argv) { if ((argc == 0) || !*argv) return (0); while (--argc >= 0) if (!is_pool(argv[argc])) return (0); return (1); } /* * Helper function to print out vdev/pool names we can't resolve. Used for an * error message. */ static void error_list_unresolved_vdevs(int argc, char **argv, char *pool_name, iostat_cbdata_t *cb) { int i; char *name; char *str; for (i = 0; i < argc; i++) { name = argv[i]; if (is_pool(name)) str = gettext("pool"); else if (are_vdevs_in_pool(1, &name, pool_name, cb)) str = gettext("vdev in this pool"); else if (are_vdevs_in_pool(1, &name, NULL, cb)) str = gettext("vdev in another pool"); else str = gettext("unknown"); fprintf(stderr, "\t%s (%s)\n", name, str); } } /* * Same as get_interval_count(), but with additional checks to not misinterpret * guids as interval/count values. Assumes VDEV_NAME_GUID is set in * cb.cb_name_flags. */ static void get_interval_count_filter_guids(int *argc, char **argv, float *interval, unsigned long *count, iostat_cbdata_t *cb) { char **tmpargv = argv; int argc_for_interval = 0; /* Is the last arg an interval value? Or a guid? */ if (*argc >= 1 && !are_vdevs_in_pool(1, &argv[*argc - 1], NULL, cb)) { /* * The last arg is not a guid, so it's probably an * interval value. */ argc_for_interval++; if (*argc >= 2 && !are_vdevs_in_pool(1, &argv[*argc - 2], NULL, cb)) { /* * The 2nd to last arg is not a guid, so it's probably * an interval value. */ argc_for_interval++; } } /* Point to our list of possible intervals */ tmpargv = &argv[*argc - argc_for_interval]; *argc = *argc - argc_for_interval; get_interval_count(&argc_for_interval, tmpargv, interval, count); } -/* - * Floating point sleep(). Allows you to pass in a floating point value for - * seconds. - */ -static void -fsleep(float sec) -{ - struct timespec req; - req.tv_sec = floor(sec); - req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; - nanosleep(&req, NULL); -} - /* * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or * if we were unable to determine its size. */ static int terminal_height(void) { struct winsize win; if (isatty(STDOUT_FILENO) == 0) return (-1); if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) != -1 && win.ws_row > 0) return (win.ws_row); return (-1); } /* * Run one of the zpool status/iostat -c scripts with the help (-h) option and * print the result. * * name: Short name of the script ('iostat'). * path: Full path to the script ('/usr/local/etc/zfs/zpool.d/iostat'); */ static void print_zpool_script_help(char *name, char *path) { char *argv[] = {path, "-h", NULL}; char **lines = NULL; int lines_cnt = 0; int rc; rc = libzfs_run_process_get_stdout_nopath(path, argv, NULL, &lines, &lines_cnt); if (rc != 0 || lines == NULL || lines_cnt <= 0) { if (lines != NULL) libzfs_free_str_array(lines, lines_cnt); return; } for (int i = 0; i < lines_cnt; i++) if (!is_blank_str(lines[i])) printf(" %-14s %s\n", name, lines[i]); libzfs_free_str_array(lines, lines_cnt); } /* * Go though the zpool status/iostat -c scripts in the user's path, run their * help option (-h), and print out the results. */ static void print_zpool_dir_scripts(char *dirpath) { DIR *dir; struct dirent *ent; char fullpath[MAXPATHLEN]; struct stat dir_stat; if ((dir = opendir(dirpath)) != NULL) { /* print all the files and directories within directory */ while ((ent = readdir(dir)) != NULL) { if (snprintf(fullpath, sizeof (fullpath), "%s/%s", dirpath, ent->d_name) >= sizeof (fullpath)) { (void) fprintf(stderr, gettext("internal error: " "ZPOOL_SCRIPTS_PATH too large.\n")); exit(1); } /* Print the scripts */ if (stat(fullpath, &dir_stat) == 0) if (dir_stat.st_mode & S_IXUSR && S_ISREG(dir_stat.st_mode)) print_zpool_script_help(ent->d_name, fullpath); } closedir(dir); } } /* * Print out help text for all zpool status/iostat -c scripts. */ static void print_zpool_script_list(char *subcommand) { char *dir, *sp; printf(gettext("Available 'zpool %s -c' commands:\n"), subcommand); sp = zpool_get_cmd_search_path(); if (sp == NULL) return; dir = strtok(sp, ":"); while (dir != NULL) { print_zpool_dir_scripts(dir); dir = strtok(NULL, ":"); } free(sp); } /* * Set the minimum pool/vdev name column width. The width must be at least 10, * but may be as large as the column width - 42 so it still fits on one line. * NOTE: 42 is the width of the default capacity/operations/bandwidth output */ static int get_namewidth_iostat(zpool_handle_t *zhp, void *data) { iostat_cbdata_t *cb = data; int width, available_width; /* * get_namewidth() returns the maximum width of any name in that column * for any pool/vdev/device line that will be output. */ width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose); /* * The width we are calculating is the width of the header and also the * padding width for names that are less than maximum width. The stats * take up 42 characters, so the width available for names is: */ available_width = get_columns() - 42; /* * If the maximum width fits on a screen, then great! Make everything * line up by justifying all lines to the same width. If that max * width is larger than what's available, the name plus stats won't fit * on one line, and justifying to that width would cause every line to * wrap on the screen. We only want lines with long names to wrap. * Limit the padding to what won't wrap. */ if (width > available_width) width = available_width; /* * And regardless of whatever the screen width is (get_columns can * return 0 if the width is not known or less than 42 for a narrow * terminal) have the width be a minimum of 10. */ if (width < 10) width = 10; /* Save the calculated width */ cb->cb_namewidth = width; return (0); } /* * zpool iostat [[-c [script1,script2,...]] [-lq]|[-rw]] [-ghHLpPvy] [-n name] * [-T d|u] [[ pool ...]|[pool vdev ...]|[vdev ...]] * [interval [count]] * * -c CMD For each vdev, run command CMD * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. * -P Display full path for vdev name. * -v Display statistics for individual vdevs * -h Display help * -p Display values in parsable (exact) format. * -H Scripted mode. Don't display headers, and separate properties * by a single tab. * -l Display average latency * -q Display queue depths * -w Display latency histograms * -r Display request size histogram * -T Display a timestamp in date(1) or Unix format * -n Only print headers once * * This command can be tricky because we want to be able to deal with pool * creation/destruction as well as vdev configuration changes. The bulk of this * processing is handled by the pool_list_* routines in zpool_iter.c. We rely * on pool_list_update() to detect the addition of new pools. Configuration * changes are all handled within libzfs. */ int zpool_do_iostat(int argc, char **argv) { int c; int ret; int npools; float interval = 0; unsigned long count = 0; int winheight = 24; zpool_list_t *list; boolean_t verbose = B_FALSE; boolean_t latency = B_FALSE, l_histo = B_FALSE, rq_histo = B_FALSE; boolean_t queues = B_FALSE, parsable = B_FALSE, scripted = B_FALSE; boolean_t omit_since_boot = B_FALSE; boolean_t guid = B_FALSE; boolean_t follow_links = B_FALSE; boolean_t full_name = B_FALSE; boolean_t headers_once = B_FALSE; iostat_cbdata_t cb = { 0 }; char *cmd = NULL; /* Used for printing error message */ const char flag_to_arg[] = {[IOS_LATENCY] = 'l', [IOS_QUEUES] = 'q', [IOS_L_HISTO] = 'w', [IOS_RQ_HISTO] = 'r'}; uint64_t unsupported_flags; /* check options */ while ((c = getopt(argc, argv, "c:gLPT:vyhplqrwnH")) != -1) { switch (c) { case 'c': if (cmd != NULL) { fprintf(stderr, gettext("Can't set -c flag twice\n")); exit(1); } if (getenv("ZPOOL_SCRIPTS_ENABLED") != NULL && !libzfs_envvar_is_set("ZPOOL_SCRIPTS_ENABLED")) { fprintf(stderr, gettext( "Can't run -c, disabled by " "ZPOOL_SCRIPTS_ENABLED.\n")); exit(1); } if ((getuid() <= 0 || geteuid() <= 0) && !libzfs_envvar_is_set("ZPOOL_SCRIPTS_AS_ROOT")) { fprintf(stderr, gettext( "Can't run -c with root privileges " "unless ZPOOL_SCRIPTS_AS_ROOT is set.\n")); exit(1); } cmd = optarg; verbose = B_TRUE; break; case 'g': guid = B_TRUE; break; case 'L': follow_links = B_TRUE; break; case 'P': full_name = B_TRUE; break; case 'T': get_timestamp_arg(*optarg); break; case 'v': verbose = B_TRUE; break; case 'p': parsable = B_TRUE; break; case 'l': latency = B_TRUE; break; case 'q': queues = B_TRUE; break; case 'H': scripted = B_TRUE; break; case 'w': l_histo = B_TRUE; break; case 'r': rq_histo = B_TRUE; break; case 'y': omit_since_boot = B_TRUE; break; case 'n': headers_once = B_TRUE; break; case 'h': usage(B_FALSE); break; case '?': if (optopt == 'c') { print_zpool_script_list("iostat"); exit(0); } else { fprintf(stderr, gettext("invalid option '%c'\n"), optopt); } usage(B_FALSE); } } argc -= optind; argv += optind; cb.cb_literal = parsable; cb.cb_scripted = scripted; if (guid) cb.cb_name_flags |= VDEV_NAME_GUID; if (follow_links) cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; if (full_name) cb.cb_name_flags |= VDEV_NAME_PATH; cb.cb_iteration = 0; cb.cb_namewidth = 0; cb.cb_verbose = verbose; /* Get our interval and count values (if any) */ if (guid) { get_interval_count_filter_guids(&argc, argv, &interval, &count, &cb); } else { get_interval_count(&argc, argv, &interval, &count); } if (argc == 0) { /* No args, so just print the defaults. */ } else if (are_all_pools(argc, argv)) { /* All the args are pool names */ } else if (are_vdevs_in_pool(argc, argv, NULL, &cb)) { /* All the args are vdevs */ cb.cb_vdev_names = argv; cb.cb_vdev_names_count = argc; argc = 0; /* No pools to process */ } else if (are_all_pools(1, argv)) { /* The first arg is a pool name */ if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], &cb)) { /* ...and the rest are vdev names */ cb.cb_vdev_names = argv + 1; cb.cb_vdev_names_count = argc - 1; argc = 1; /* One pool to process */ } else { fprintf(stderr, gettext("Expected either a list of ")); fprintf(stderr, gettext("pools, or list of vdevs in")); fprintf(stderr, " \"%s\", ", argv[0]); fprintf(stderr, gettext("but got:\n")); error_list_unresolved_vdevs(argc - 1, argv + 1, argv[0], &cb); fprintf(stderr, "\n"); usage(B_FALSE); return (1); } } else { /* * The args don't make sense. The first arg isn't a pool name, * nor are all the args vdevs. */ fprintf(stderr, gettext("Unable to parse pools/vdevs list.\n")); fprintf(stderr, "\n"); return (1); } if (cb.cb_vdev_names_count != 0) { /* * If user specified vdevs, it implies verbose. */ cb.cb_verbose = B_TRUE; } /* * Construct the list of all interesting pools. */ ret = 0; if ((list = pool_list_get(argc, argv, NULL, parsable, &ret)) == NULL) return (1); if (pool_list_count(list) == 0 && argc != 0) { pool_list_free(list); return (1); } if (pool_list_count(list) == 0 && interval == 0) { pool_list_free(list); (void) fprintf(stderr, gettext("no pools available\n")); return (1); } if ((l_histo || rq_histo) && (cmd != NULL || latency || queues)) { pool_list_free(list); (void) fprintf(stderr, gettext("[-r|-w] isn't allowed with [-c|-l|-q]\n")); usage(B_FALSE); return (1); } if (l_histo && rq_histo) { pool_list_free(list); (void) fprintf(stderr, gettext("Only one of [-r|-w] can be passed at a time\n")); usage(B_FALSE); return (1); } /* * Enter the main iostat loop. */ cb.cb_list = list; if (l_histo) { /* * Histograms tables look out of place when you try to display * them with the other stats, so make a rule that you can only * print histograms by themselves. */ cb.cb_flags = IOS_L_HISTO_M; } else if (rq_histo) { cb.cb_flags = IOS_RQ_HISTO_M; } else { cb.cb_flags = IOS_DEFAULT_M; if (latency) cb.cb_flags |= IOS_LATENCY_M; if (queues) cb.cb_flags |= IOS_QUEUES_M; } /* * See if the module supports all the stats we want to display. */ unsupported_flags = cb.cb_flags & ~get_stat_flags(list); if (unsupported_flags) { uint64_t f; int idx; fprintf(stderr, gettext("The loaded zfs module doesn't support:")); /* for each bit set in unsupported_flags */ for (f = unsupported_flags; f; f &= ~(1ULL << idx)) { idx = lowbit64(f) - 1; fprintf(stderr, " -%c", flag_to_arg[idx]); } fprintf(stderr, ". Try running a newer module.\n"); pool_list_free(list); return (1); } for (;;) { if ((npools = pool_list_count(list)) == 0) (void) fprintf(stderr, gettext("no pools available\n")); else { /* * If this is the first iteration and -y was supplied * we skip any printing. */ boolean_t skip = (omit_since_boot && cb.cb_iteration == 0); /* * Refresh all statistics. This is done as an * explicit step before calculating the maximum name * width, so that any * configuration changes are * properly accounted for. */ (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb); /* * Iterate over all pools to determine the maximum width * for the pool / device name column across all pools. */ cb.cb_namewidth = 0; (void) pool_list_iter(list, B_FALSE, get_namewidth_iostat, &cb); if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); if (cmd != NULL && cb.cb_verbose && !(cb.cb_flags & IOS_ANYHISTO_M)) { cb.vcdl = all_pools_for_each_vdev_run(argc, argv, cmd, g_zfs, cb.cb_vdev_names, cb.cb_vdev_names_count, cb.cb_name_flags); } else { cb.vcdl = NULL; } /* * Check terminal size so we can print headers * even when terminal window has its height * changed. */ winheight = terminal_height(); /* * Are we connected to TTY? If not, headers_once * should be true, to avoid breaking scripts. */ if (winheight < 0) headers_once = B_TRUE; /* * If it's the first time and we're not skipping it, * or either skip or verbose mode, print the header. * * The histogram code explicitly prints its header on * every vdev, so skip this for histograms. */ if (((++cb.cb_iteration == 1 && !skip) || (skip != verbose) || (!headers_once && (cb.cb_iteration % winheight) == 0)) && (!(cb.cb_flags & IOS_ANYHISTO_M)) && !cb.cb_scripted) print_iostat_header(&cb); if (skip) { (void) fsleep(interval); continue; } pool_list_iter(list, B_FALSE, print_iostat, &cb); /* * If there's more than one pool, and we're not in * verbose mode (which prints a separator for us), * then print a separator. * * In addition, if we're printing specific vdevs then * we also want an ending separator. */ if (((npools > 1 && !verbose && !(cb.cb_flags & IOS_ANYHISTO_M)) || (!(cb.cb_flags & IOS_ANYHISTO_M) && cb.cb_vdev_names_count)) && !cb.cb_scripted) { print_iostat_separator(&cb); if (cb.vcdl != NULL) print_cmd_columns(cb.vcdl, 1); printf("\n"); } if (cb.vcdl != NULL) free_vdev_cmd_data_list(cb.vcdl); } /* * Flush the output so that redirection to a file isn't buffered * indefinitely. */ (void) fflush(stdout); if (interval == 0) break; if (count != 0 && --count == 0) break; (void) fsleep(interval); } pool_list_free(list); return (ret); } typedef struct list_cbdata { boolean_t cb_verbose; int cb_name_flags; int cb_namewidth; boolean_t cb_scripted; zprop_list_t *cb_proplist; boolean_t cb_literal; } list_cbdata_t; /* * Given a list of columns to display, output appropriate headers for each one. */ static void print_header(list_cbdata_t *cb) { zprop_list_t *pl = cb->cb_proplist; char headerbuf[ZPOOL_MAXPROPLEN]; const char *header; boolean_t first = B_TRUE; boolean_t right_justify; size_t width = 0; for (; pl != NULL; pl = pl->pl_next) { width = pl->pl_width; if (first && cb->cb_verbose) { /* * Reset the width to accommodate the verbose listing * of devices. */ width = cb->cb_namewidth; } if (!first) (void) printf(" "); else first = B_FALSE; right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { header = zpool_prop_column_name(pl->pl_prop); right_justify = zpool_prop_align_right(pl->pl_prop); } else { int i; for (i = 0; pl->pl_user_prop[i] != '\0'; i++) headerbuf[i] = toupper(pl->pl_user_prop[i]); headerbuf[i] = '\0'; header = headerbuf; } if (pl->pl_next == NULL && !right_justify) (void) printf("%s", header); else if (right_justify) (void) printf("%*s", (int)width, header); else (void) printf("%-*s", (int)width, header); } (void) printf("\n"); } /* * Given a pool and a list of properties, print out all the properties according * to the described layout. Used by zpool_do_list(). */ static void print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) { zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; char property[ZPOOL_MAXPROPLEN]; char *propstr; boolean_t right_justify; size_t width; for (; pl != NULL; pl = pl->pl_next) { width = pl->pl_width; if (first && cb->cb_verbose) { /* * Reset the width to accommodate the verbose listing * of devices. */ width = cb->cb_namewidth; } if (!first) { if (cb->cb_scripted) (void) printf("\t"); else (void) printf(" "); } else { first = B_FALSE; } right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { if (zpool_get_prop(zhp, pl->pl_prop, property, sizeof (property), NULL, cb->cb_literal) != 0) propstr = "-"; else propstr = property; right_justify = zpool_prop_align_right(pl->pl_prop); } else if ((zpool_prop_feature(pl->pl_user_prop) || zpool_prop_unsupported(pl->pl_user_prop)) && zpool_prop_get_feature(zhp, pl->pl_user_prop, property, sizeof (property)) == 0) { propstr = property; } else { propstr = "-"; } /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) (void) printf("%*s", (int)width, propstr); else (void) printf("%-*s", (int)width, propstr); } (void) printf("\n"); } static void print_one_column(zpool_prop_t prop, uint64_t value, const char *str, boolean_t scripted, boolean_t valid, enum zfs_nicenum_format format) { char propval[64]; boolean_t fixed; size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); switch (prop) { case ZPOOL_PROP_SIZE: case ZPOOL_PROP_EXPANDSZ: case ZPOOL_PROP_CHECKPOINT: case ZPOOL_PROP_DEDUPRATIO: if (value == 0) (void) strlcpy(propval, "-", sizeof (propval)); else zfs_nicenum_format(value, propval, sizeof (propval), format); break; case ZPOOL_PROP_FRAGMENTATION: if (value == ZFS_FRAG_INVALID) { (void) strlcpy(propval, "-", sizeof (propval)); } else if (format == ZFS_NICENUM_RAW) { (void) snprintf(propval, sizeof (propval), "%llu", (unsigned long long)value); } else { (void) snprintf(propval, sizeof (propval), "%llu%%", (unsigned long long)value); } break; case ZPOOL_PROP_CAPACITY: /* capacity value is in parts-per-10,000 (aka permyriad) */ if (format == ZFS_NICENUM_RAW) (void) snprintf(propval, sizeof (propval), "%llu", (unsigned long long)value / 100); else (void) snprintf(propval, sizeof (propval), value < 1000 ? "%1.2f%%" : value < 10000 ? "%2.1f%%" : "%3.0f%%", value / 100.0); break; case ZPOOL_PROP_HEALTH: width = 8; (void) strlcpy(propval, str, sizeof (propval)); break; default: zfs_nicenum_format(value, propval, sizeof (propval), format); } if (!valid) (void) strlcpy(propval, "-", sizeof (propval)); if (scripted) (void) printf("\t%s", propval); else (void) printf(" %*s", (int)width, propval); } /* * print static default line per vdev * not compatible with '-o' option */ static void print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, list_cbdata_t *cb, int depth, boolean_t isspare) { nvlist_t **child; vdev_stat_t *vs; uint_t c, children; char *vname; boolean_t scripted = cb->cb_scripted; uint64_t islog = B_FALSE; char *dashes = "%-*s - - - - " "- - - - -\n"; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (name != NULL) { boolean_t toplevel = (vs->vs_space != 0); uint64_t cap; enum zfs_nicenum_format format; const char *state; if (cb->cb_literal) format = ZFS_NICENUM_RAW; else format = ZFS_NICENUM_1024; if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) return; if (scripted) (void) printf("\t%s", name); else if (strlen(name) + depth > cb->cb_namewidth) (void) printf("%*s%s", depth, "", name); else (void) printf("%*s%s%*s", depth, "", name, (int)(cb->cb_namewidth - strlen(name) - depth), ""); /* * Print the properties for the individual vdevs. Some * properties are only applicable to toplevel vdevs. The * 'toplevel' boolean value is passed to the print_one_column() * to indicate that the value is valid. */ if (vs->vs_pspace) print_one_column(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL, scripted, B_TRUE, format); else print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL, scripted, toplevel, format); print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL, scripted, toplevel, format); print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, NULL, scripted, toplevel, format); print_one_column(ZPOOL_PROP_CHECKPOINT, vs->vs_checkpoint_space, NULL, scripted, toplevel, format); print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL, scripted, B_TRUE, format); print_one_column(ZPOOL_PROP_FRAGMENTATION, vs->vs_fragmentation, NULL, scripted, (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel), format); cap = (vs->vs_space == 0) ? 0 : (vs->vs_alloc * 10000 / vs->vs_space); print_one_column(ZPOOL_PROP_CAPACITY, cap, NULL, scripted, toplevel, format); print_one_column(ZPOOL_PROP_DEDUPRATIO, 0, NULL, scripted, toplevel, format); state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if (isspare) { if (vs->vs_aux == VDEV_AUX_SPARED) state = "INUSE"; else if (vs->vs_state == VDEV_STATE_HEALTHY) state = "AVAIL"; } print_one_column(ZPOOL_PROP_HEALTH, 0, state, scripted, B_TRUE, format); (void) printf("\n"); } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return; /* list the normal vdevs first */ for (c = 0; c < children; c++) { uint64_t ishole = B_FALSE; if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole) continue; if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) continue; if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) continue; vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE); free(vname); } /* list the classes: 'logs', 'dedup', and 'special' */ for (uint_t n = 0; n < 3; n++) { boolean_t printed = B_FALSE; for (c = 0; c < children; c++) { char *bias = NULL; char *type = NULL; if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) { bias = VDEV_ALLOC_CLASS_LOGS; } else { (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type); } if (bias == NULL || strcmp(bias, class_name[n]) != 0) continue; if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0) continue; if (!printed) { /* LINTED E_SEC_PRINTF_VAR_FMT */ (void) printf(dashes, cb->cb_namewidth, class_name[n]); printed = B_TRUE; } vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE); free(vname); } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0 && children > 0) { /* LINTED E_SEC_PRINTF_VAR_FMT */ (void) printf(dashes, cb->cb_namewidth, "cache"); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE); free(vname); } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0 && children > 0) { /* LINTED E_SEC_PRINTF_VAR_FMT */ (void) printf(dashes, cb->cb_namewidth, "spare"); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); print_list_stats(zhp, vname, child[c], cb, depth + 2, B_TRUE); free(vname); } } } /* * Generic callback function to list a pool. */ static int list_callback(zpool_handle_t *zhp, void *data) { list_cbdata_t *cbp = data; print_pool(zhp, cbp); if (cbp->cb_verbose) { nvlist_t *config, *nvroot; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); print_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE); } return (0); } /* * Set the minimum pool/vdev name column width. The width must be at least 9, * but may be as large as needed. */ static int get_namewidth_list(zpool_handle_t *zhp, void *data) { list_cbdata_t *cb = data; int width; width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose); if (width < 9) width = 9; cb->cb_namewidth = width; return (0); } /* * zpool list [-gHLpP] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]] * * -g Display guid for individual vdev name. * -H Scripted mode. Don't display headers, and separate properties * by a single tab. * -L Follow links when resolving vdev path name. * -o List of properties to display. Defaults to * "name,size,allocated,free,expandsize,fragmentation,capacity," * "dedupratio,health,altroot" * -p Display values in parsable (exact) format. * -P Display full path for vdev name. * -T Display a timestamp in date(1) or Unix format * * List all pools in the system, whether or not they're healthy. Output space * statistics for each one, as well as health status summary. */ int zpool_do_list(int argc, char **argv) { int c; int ret = 0; list_cbdata_t cb = { 0 }; static char default_props[] = "name,size,allocated,free,checkpoint,expandsize,fragmentation," "capacity,dedupratio,health,altroot"; char *props = default_props; float interval = 0; unsigned long count = 0; zpool_list_t *list; boolean_t first = B_TRUE; /* check options */ while ((c = getopt(argc, argv, ":gHLo:pPT:v")) != -1) { switch (c) { case 'g': cb.cb_name_flags |= VDEV_NAME_GUID; break; case 'H': cb.cb_scripted = B_TRUE; break; case 'L': cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; break; case 'o': props = optarg; break; case 'P': cb.cb_name_flags |= VDEV_NAME_PATH; break; case 'p': cb.cb_literal = B_TRUE; break; case 'T': get_timestamp_arg(*optarg); break; case 'v': cb.cb_verbose = B_TRUE; cb.cb_namewidth = 8; /* 8 until precalc is avail */ break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; get_interval_count(&argc, argv, &interval, &count); if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) usage(B_FALSE); for (;;) { if ((list = pool_list_get(argc, argv, &cb.cb_proplist, cb.cb_literal, &ret)) == NULL) return (1); if (pool_list_count(list) == 0) break; cb.cb_namewidth = 0; (void) pool_list_iter(list, B_FALSE, get_namewidth_list, &cb); if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); if (!cb.cb_scripted && (first || cb.cb_verbose)) { print_header(&cb); first = B_FALSE; } ret = pool_list_iter(list, B_TRUE, list_callback, &cb); if (interval == 0) break; if (count != 0 && --count == 0) break; pool_list_free(list); (void) fsleep(interval); } if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { (void) printf(gettext("no pools available\n")); ret = 0; } pool_list_free(list); zprop_free_list(cb.cb_proplist); return (ret); } static int zpool_do_attach_or_replace(int argc, char **argv, int replacing) { boolean_t force = B_FALSE; boolean_t rebuild = B_FALSE; boolean_t wait = B_FALSE; int c; nvlist_t *nvroot; char *poolname, *old_disk, *new_disk; zpool_handle_t *zhp; nvlist_t *props = NULL; char *propval; int ret; /* check options */ while ((c = getopt(argc, argv, "fo:sw")) != -1) { switch (c) { case 'f': force = B_TRUE; break; case 'o': if ((propval = strchr(optarg, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " "'=' for -o option\n")); usage(B_FALSE); } *propval = '\0'; propval++; if ((strcmp(optarg, ZPOOL_CONFIG_ASHIFT) != 0) || (add_prop_list(optarg, propval, &props, B_TRUE))) usage(B_FALSE); break; case 's': rebuild = B_TRUE; break; case 'w': wait = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } poolname = argv[0]; if (argc < 2) { (void) fprintf(stderr, gettext("missing specification\n")); usage(B_FALSE); } old_disk = argv[1]; if (argc < 3) { if (!replacing) { (void) fprintf(stderr, gettext("missing specification\n")); usage(B_FALSE); } new_disk = old_disk; argc -= 1; argv += 1; } else { new_disk = argv[2]; argc -= 2; argv += 2; } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { nvlist_free(props); return (1); } if (zpool_get_config(zhp, NULL) == NULL) { (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), poolname); zpool_close(zhp); nvlist_free(props); return (1); } /* unless manually specified use "ashift" pool property (if set) */ if (!nvlist_exists(props, ZPOOL_CONFIG_ASHIFT)) { int intval; zprop_source_t src; char strval[ZPOOL_MAXPROPLEN]; intval = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &src); if (src != ZPROP_SRC_DEFAULT) { (void) sprintf(strval, "%" PRId32, intval); verify(add_prop_list(ZPOOL_CONFIG_ASHIFT, strval, &props, B_TRUE) == 0); } } nvroot = make_root_vdev(zhp, props, force, B_FALSE, replacing, B_FALSE, argc, argv); if (nvroot == NULL) { zpool_close(zhp); nvlist_free(props); return (1); } ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing, rebuild); if (ret == 0 && wait) ret = zpool_wait(zhp, replacing ? ZPOOL_WAIT_REPLACE : ZPOOL_WAIT_RESILVER); nvlist_free(props); nvlist_free(nvroot); zpool_close(zhp); return (ret); } /* * zpool replace [-fsw] [-o property=value] * * -f Force attach, even if appears to be in use. * -s Use sequential instead of healing reconstruction for resilver. * -o Set property=value. * -w Wait for replacing to complete before returning * * Replace with . */ /* ARGSUSED */ int zpool_do_replace(int argc, char **argv) { return (zpool_do_attach_or_replace(argc, argv, B_TRUE)); } /* * zpool attach [-fsw] [-o property=value] * * -f Force attach, even if appears to be in use. * -s Use sequential instead of healing reconstruction for resilver. * -o Set property=value. * -w Wait for resilvering to complete before returning * * Attach to the mirror containing . If is not * part of a mirror, then will be transformed into a mirror of * and . In either case, will begin life * with a DTL of [0, now], and will immediately begin to resilver itself. */ int zpool_do_attach(int argc, char **argv) { return (zpool_do_attach_or_replace(argc, argv, B_FALSE)); } /* * zpool detach [-f] * * -f Force detach of , even if DTLs argue against it * (not supported yet) * * Detach a device from a mirror. The operation will be refused if * is the last device in the mirror, or if the DTLs indicate that this device * has the only valid copy of some data. */ /* ARGSUSED */ int zpool_do_detach(int argc, char **argv) { int c; char *poolname, *path; zpool_handle_t *zhp; int ret; /* check options */ while ((c = getopt(argc, argv, "")) != -1) { switch (c) { case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing specification\n")); usage(B_FALSE); } poolname = argv[0]; path = argv[1]; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); ret = zpool_vdev_detach(zhp, path); zpool_close(zhp); return (ret); } /* * zpool split [-gLnP] [-o prop=val] ... * [-o mntopt] ... * [-R altroot] [ ...] * * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. * -n Do not split the pool, but display the resulting layout if * it were to be split. * -o Set property=value, or set mount options. * -P Display full path for vdev name. * -R Mount the split-off pool under an alternate root. * -l Load encryption keys while importing. * * Splits the named pool and gives it the new pool name. Devices to be split * off may be listed, provided that no more than one device is specified * per top-level vdev mirror. The newly split pool is left in an exported * state unless -R is specified. * * Restrictions: the top-level of the pool pool must only be made up of * mirrors; all devices in the pool must be healthy; no device may be * undergoing a resilvering operation. */ int zpool_do_split(int argc, char **argv) { char *srcpool, *newpool, *propval; char *mntopts = NULL; splitflags_t flags; int c, ret = 0; boolean_t loadkeys = B_FALSE; zpool_handle_t *zhp; nvlist_t *config, *props = NULL; flags.dryrun = B_FALSE; flags.import = B_FALSE; flags.name_flags = 0; /* check options */ while ((c = getopt(argc, argv, ":gLR:lno:P")) != -1) { switch (c) { case 'g': flags.name_flags |= VDEV_NAME_GUID; break; case 'L': flags.name_flags |= VDEV_NAME_FOLLOW_LINKS; break; case 'R': flags.import = B_TRUE; if (add_prop_list( zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE) != 0) { nvlist_free(props); usage(B_FALSE); } break; case 'l': loadkeys = B_TRUE; break; case 'n': flags.dryrun = B_TRUE; break; case 'o': if ((propval = strchr(optarg, '=')) != NULL) { *propval = '\0'; propval++; if (add_prop_list(optarg, propval, &props, B_TRUE) != 0) { nvlist_free(props); usage(B_FALSE); } } else { mntopts = optarg; } break; case 'P': flags.name_flags |= VDEV_NAME_PATH; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); break; } } if (!flags.import && mntopts != NULL) { (void) fprintf(stderr, gettext("setting mntopts is only " "valid when importing the pool\n")); usage(B_FALSE); } if (!flags.import && loadkeys) { (void) fprintf(stderr, gettext("loading keys is only " "valid when importing the pool\n")); usage(B_FALSE); } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("Missing pool name\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("Missing new pool name\n")); usage(B_FALSE); } srcpool = argv[0]; newpool = argv[1]; argc -= 2; argv += 2; if ((zhp = zpool_open(g_zfs, srcpool)) == NULL) { nvlist_free(props); return (1); } config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv); if (config == NULL) { ret = 1; } else { if (flags.dryrun) { (void) printf(gettext("would create '%s' with the " "following layout:\n\n"), newpool); print_vdev_tree(NULL, newpool, config, 0, "", flags.name_flags); print_vdev_tree(NULL, "dedup", config, 0, VDEV_ALLOC_BIAS_DEDUP, 0); print_vdev_tree(NULL, "special", config, 0, VDEV_ALLOC_BIAS_SPECIAL, 0); } } zpool_close(zhp); if (ret != 0 || flags.dryrun || !flags.import) { nvlist_free(config); nvlist_free(props); return (ret); } /* * The split was successful. Now we need to open the new * pool and import it. */ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) { nvlist_free(config); nvlist_free(props); return (1); } if (loadkeys) { ret = zfs_crypto_attempt_load_keys(g_zfs, newpool); if (ret != 0) ret = 1; } if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { ret = 1; (void) fprintf(stderr, gettext("Split was successful, but " "the datasets could not all be mounted\n")); (void) fprintf(stderr, gettext("Try doing '%s' with a " "different altroot\n"), "zpool import"); } zpool_close(zhp); nvlist_free(config); nvlist_free(props); return (ret); } - +#define POWER_OPT 1024 /* - * zpool online ... + * zpool online [--power] ... + * + * --power: Power on the enclosure slot to the drive (if possible) */ int zpool_do_online(int argc, char **argv) { int c, i; char *poolname; zpool_handle_t *zhp; int ret = 0; vdev_state_t newstate; int flags = 0; + boolean_t is_power_on = B_FALSE; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "e")) != -1) { + while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing device name\n")); usage(B_FALSE); } poolname = argv[0]; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); for (i = 1; i < argc; i++) { vdev_state_t oldstate; boolean_t avail_spare, l2cache; + int rc; + + if (is_power_on) { + rc = zpool_power_on_and_disk_wait(zhp, argv[i]); + if (rc == ENOTSUP) { + (void) fprintf(stderr, + gettext("Power control not supported\n")); + } + if (rc != 0) + return (rc); + } + nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare, &l2cache, NULL); if (tgt == NULL) { ret = 1; continue; } uint_t vsc; oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt, ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state; if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { if (newstate != VDEV_STATE_HEALTHY) { (void) printf(gettext("warning: device '%s' " "onlined, but remains in faulted state\n"), argv[i]); if (newstate == VDEV_STATE_FAULTED) (void) printf(gettext("use 'zpool " "clear' to restore a faulted " "device\n")); else (void) printf(gettext("use 'zpool " "replace' to replace devices " "that are no longer present\n")); if ((flags & ZFS_ONLINE_EXPAND)) { (void) printf(gettext("%s: failed " "to expand usable space on " "unhealthy device '%s'\n"), (oldstate >= VDEV_STATE_DEGRADED ? "error" : "warning"), argv[i]); if (oldstate >= VDEV_STATE_DEGRADED) { ret = 1; break; } } } } else { ret = 1; } } zpool_close(zhp); return (ret); } /* - * zpool offline [-ft] ... + * zpool offline [-ft]|[--power] ... + * * * -f Force the device into a faulted state. * * -t Only take the device off-line temporarily. The offline/faulted * state will not be persistent across reboots. + * + * --power Power off the enclosure slot to the drive (if possible) */ /* ARGSUSED */ int zpool_do_offline(int argc, char **argv) { int c, i; char *poolname; zpool_handle_t *zhp; int ret = 0; boolean_t istmp = B_FALSE; boolean_t fault = B_FALSE; + boolean_t is_power_off = B_FALSE; + + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "ft")) != -1) { + while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) { switch (c) { case 'f': fault = B_TRUE; break; case 't': istmp = B_TRUE; break; + case POWER_OPT: + is_power_off = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } + if (is_power_off && fault) { + (void) fprintf(stderr, + gettext("-0 and -f cannot be used together\n")); + usage(B_FALSE); + return (1); + } + + if (is_power_off && istmp) { + (void) fprintf(stderr, + gettext("-0 and -t cannot be used together\n")); + usage(B_FALSE); + return (1); + } + argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing device name\n")); usage(B_FALSE); } poolname = argv[0]; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); for (i = 1; i < argc; i++) { - if (fault) { - uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + if (is_power_off) { + /* + * Note: we have to power off first, then set REMOVED, + * or else zpool_vdev_set_removed_state() returns + * EAGAIN. + */ + ret = zpool_power_off(zhp, argv[i]); + if (ret != 0) { + (void) fprintf(stderr, "%s %s %d\n", + gettext("unable to power off slot for"), + argv[i], ret); + } + zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE); + + } else if (fault) { vdev_aux_t aux; if (istmp == B_FALSE) { /* Force the fault to persist across imports */ aux = VDEV_AUX_EXTERNAL_PERSIST; } else { aux = VDEV_AUX_EXTERNAL; } if (guid == 0 || zpool_vdev_fault(zhp, guid, aux) != 0) ret = 1; } else { if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) ret = 1; } } zpool_close(zhp); return (ret); } /* - * zpool clear [device] + * zpool clear [-nF]|[--power] [device] * * Clear all errors associated with a pool or a particular device. */ int zpool_do_clear(int argc, char **argv) { int c; int ret = 0; boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; + boolean_t is_power_on = B_FALSE; uint32_t rewind_policy = ZPOOL_NO_REWIND; nvlist_t *policy = NULL; zpool_handle_t *zhp; char *pool, *device; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "FnX")) != -1) { + while ((c = getopt_long(argc, argv, "FnX", long_options, + NULL)) != -1) { switch (c) { case 'F': do_rewind = B_TRUE; break; case 'n': dryrun = B_TRUE; break; case 'X': xtreme_rewind = B_TRUE; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if ((dryrun || xtreme_rewind) && !do_rewind) { (void) fprintf(stderr, gettext("-n or -X only meaningful with -F\n")); usage(B_FALSE); } if (dryrun) rewind_policy = ZPOOL_TRY_REWIND; else if (do_rewind) rewind_policy = ZPOOL_DO_REWIND; if (xtreme_rewind) rewind_policy |= ZPOOL_EXTREME_REWIND; /* In future, further rewind policy choices can be passed along here */ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind_policy) != 0) { return (1); } pool = argv[0]; device = argc == 2 ? argv[1] : NULL; if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { nvlist_free(policy); return (1); } + if (is_power_on) { + if (device == NULL) { + zpool_power_on_pool_and_wait_for_devices(zhp); + } else { + zpool_power_on_and_disk_wait(zhp, device); + } + } + if (zpool_clear(zhp, device, policy) != 0) ret = 1; zpool_close(zhp); nvlist_free(policy); return (ret); } /* * zpool reguid */ int zpool_do_reguid(int argc, char **argv) { int c; char *poolname; zpool_handle_t *zhp; int ret = 0; /* check options */ while ((c = getopt(argc, argv, "")) != -1) { switch (c) { case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } poolname = argv[0]; if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); ret = zpool_reguid(zhp); zpool_close(zhp); return (ret); } /* * zpool reopen * * Reopen the pool so that the kernel can update the sizes of all vdevs. */ int zpool_do_reopen(int argc, char **argv) { int c; int ret = 0; boolean_t scrub_restart = B_TRUE; /* check options */ while ((c = getopt(argc, argv, "n")) != -1) { switch (c) { case 'n': scrub_restart = B_FALSE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; /* if argc == 0 we will execute zpool_reopen_one on all pools */ ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_reopen_one, &scrub_restart); return (ret); } typedef struct scrub_cbdata { int cb_type; pool_scrub_cmd_t cb_scrub_cmd; } scrub_cbdata_t; static boolean_t zpool_has_checkpoint(zpool_handle_t *zhp) { nvlist_t *config, *nvroot; config = zpool_get_config(zhp, NULL); if (config != NULL) { pool_checkpoint_stat_t *pcs = NULL; uint_t c; nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); if (pcs == NULL || pcs->pcs_state == CS_NONE) return (B_FALSE); assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS || pcs->pcs_state == CS_CHECKPOINT_DISCARDING); return (B_TRUE); } return (B_FALSE); } static int scrub_callback(zpool_handle_t *zhp, void *data) { scrub_cbdata_t *cb = data; int err; /* * Ignore faulted pools. */ if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { (void) fprintf(stderr, gettext("cannot scan '%s': pool is " "currently unavailable\n"), zpool_get_name(zhp)); return (1); } err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); if (err == 0 && zpool_has_checkpoint(zhp) && cb->cb_type == POOL_SCAN_SCRUB) { (void) printf(gettext("warning: will not scrub state that " "belongs to the checkpoint of pool '%s'\n"), zpool_get_name(zhp)); } return (err != 0); } static int wait_callback(zpool_handle_t *zhp, void *data) { zpool_wait_activity_t *act = data; return (zpool_wait(zhp, *act)); } /* * zpool scrub [-s | -p] [-w] ... * * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. */ int zpool_do_scrub(int argc, char **argv) { int c; scrub_cbdata_t cb; boolean_t wait = B_FALSE; int error; cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; /* check options */ while ((c = getopt(argc, argv, "spw")) != -1) { switch (c) { case 's': cb.cb_type = POOL_SCAN_NONE; break; case 'p': cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; break; case 'w': wait = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } if (cb.cb_type == POOL_SCAN_NONE && cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { (void) fprintf(stderr, gettext("invalid option combination: " "-s and -p are mutually exclusive\n")); usage(B_FALSE); } if (wait && (cb.cb_type == POOL_SCAN_NONE || cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) { (void) fprintf(stderr, gettext("invalid option combination: " "-w cannot be used with -p or -s\n")); usage(B_FALSE); } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, scrub_callback, &cb); if (wait && !error) { zpool_wait_activity_t act = ZPOOL_WAIT_SCRUB; error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, wait_callback, &act); } return (error); } /* * zpool resilver ... * * Restarts any in-progress resilver */ int zpool_do_resilver(int argc, char **argv) { int c; scrub_cbdata_t cb; cb.cb_type = POOL_SCAN_RESILVER; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; /* check options */ while ((c = getopt(argc, argv, "")) != -1) { switch (c) { case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } return (for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, scrub_callback, &cb)); } /* * zpool trim [-d] [-r ] [-c | -s] [ ...] * * -c Cancel. Ends any in-progress trim. * -d Secure trim. Requires kernel and device support. * -r Sets the TRIM rate in bytes (per second). Supports * adding a multiplier suffix such as 'k' or 'm'. * -s Suspend. TRIM can then be restarted with no flags. * -w Wait. Blocks until trimming has completed. */ int zpool_do_trim(int argc, char **argv) { struct option long_options[] = { {"cancel", no_argument, NULL, 'c'}, {"secure", no_argument, NULL, 'd'}, {"rate", required_argument, NULL, 'r'}, {"suspend", no_argument, NULL, 's'}, {"wait", no_argument, NULL, 'w'}, {0, 0, 0, 0} }; pool_trim_func_t cmd_type = POOL_TRIM_START; uint64_t rate = 0; boolean_t secure = B_FALSE; boolean_t wait = B_FALSE; int c; while ((c = getopt_long(argc, argv, "cdr:sw", long_options, NULL)) != -1) { switch (c) { case 'c': if (cmd_type != POOL_TRIM_START && cmd_type != POOL_TRIM_CANCEL) { (void) fprintf(stderr, gettext("-c cannot be " "combined with other options\n")); usage(B_FALSE); } cmd_type = POOL_TRIM_CANCEL; break; case 'd': if (cmd_type != POOL_TRIM_START) { (void) fprintf(stderr, gettext("-d cannot be " "combined with the -c or -s options\n")); usage(B_FALSE); } secure = B_TRUE; break; case 'r': if (cmd_type != POOL_TRIM_START) { (void) fprintf(stderr, gettext("-r cannot be " "combined with the -c or -s options\n")); usage(B_FALSE); } if (zfs_nicestrtonum(NULL, optarg, &rate) == -1) { (void) fprintf(stderr, gettext("invalid value for rate\n")); usage(B_FALSE); } break; case 's': if (cmd_type != POOL_TRIM_START && cmd_type != POOL_TRIM_SUSPEND) { (void) fprintf(stderr, gettext("-s cannot be " "combined with other options\n")); usage(B_FALSE); } cmd_type = POOL_TRIM_SUSPEND; break; case 'w': wait = B_TRUE; break; case '?': if (optopt != 0) { (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); } else { (void) fprintf(stderr, gettext("invalid option '%s'\n"), argv[optind - 1]); } usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); return (-1); } if (wait && (cmd_type != POOL_TRIM_START)) { (void) fprintf(stderr, gettext("-w cannot be used with -c or " "-s\n")); usage(B_FALSE); } char *poolname = argv[0]; zpool_handle_t *zhp = zpool_open(g_zfs, poolname); if (zhp == NULL) return (-1); trimflags_t trim_flags = { .secure = secure, .rate = rate, .wait = wait, }; nvlist_t *vdevs = fnvlist_alloc(); if (argc == 1) { /* no individual leaf vdevs specified, so add them all */ nvlist_t *config = zpool_get_config(zhp, NULL); nvlist_t *nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); zpool_collect_leaves(zhp, nvroot, vdevs); trim_flags.fullpool = B_TRUE; } else { trim_flags.fullpool = B_FALSE; for (int i = 1; i < argc; i++) { fnvlist_add_boolean(vdevs, argv[i]); } } int error = zpool_trim(zhp, cmd_type, vdevs, &trim_flags); fnvlist_free(vdevs); zpool_close(zhp); return (error); } /* * Converts a total number of seconds to a human readable string broken * down in to days/hours/minutes/seconds. */ static void secs_to_dhms(uint64_t total, char *buf) { uint64_t days = total / 60 / 60 / 24; uint64_t hours = (total / 60 / 60) % 24; uint64_t mins = (total / 60) % 60; uint64_t secs = (total % 60); if (days > 0) { (void) sprintf(buf, "%llu days %02llu:%02llu:%02llu", (u_longlong_t)days, (u_longlong_t)hours, (u_longlong_t)mins, (u_longlong_t)secs); } else { (void) sprintf(buf, "%02llu:%02llu:%02llu", (u_longlong_t)hours, (u_longlong_t)mins, (u_longlong_t)secs); } } /* * Print out detailed scrub status. */ static void print_scan_scrub_resilver_status(pool_scan_stat_t *ps) { time_t start, end, pause; uint64_t pass_scanned, scanned, pass_issued, issued, total; uint64_t elapsed, scan_rate, issue_rate; double fraction_done; char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7]; char srate_buf[7], irate_buf[7], time_buf[32]; printf(" "); printf_color(ANSI_BOLD, gettext("scan:")); printf(" "); /* If there's never been a scan, there's not much to say. */ if (ps == NULL || ps->pss_func == POOL_SCAN_NONE || ps->pss_func >= POOL_SCAN_FUNCS) { (void) printf(gettext("none requested\n")); return; } start = ps->pss_start_time; end = ps->pss_end_time; pause = ps->pss_pass_scrub_pause; zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); int is_resilver = ps->pss_func == POOL_SCAN_RESILVER; int is_scrub = ps->pss_func == POOL_SCAN_SCRUB; assert(is_resilver || is_scrub); /* Scan is finished or canceled. */ if (ps->pss_state == DSS_FINISHED) { secs_to_dhms(end - start, time_buf); if (is_scrub) { (void) printf(gettext("scrub repaired %s " "in %s with %llu errors on %s"), processed_buf, time_buf, (u_longlong_t)ps->pss_errors, ctime(&end)); } else if (is_resilver) { (void) printf(gettext("resilvered %s " "in %s with %llu errors on %s"), processed_buf, time_buf, (u_longlong_t)ps->pss_errors, ctime(&end)); } return; } else if (ps->pss_state == DSS_CANCELED) { if (is_scrub) { (void) printf(gettext("scrub canceled on %s"), ctime(&end)); } else if (is_resilver) { (void) printf(gettext("resilver canceled on %s"), ctime(&end)); } return; } assert(ps->pss_state == DSS_SCANNING); /* Scan is in progress. Resilvers can't be paused. */ if (is_scrub) { if (pause == 0) { (void) printf(gettext("scrub in progress since %s"), ctime(&start)); } else { (void) printf(gettext("scrub paused since %s"), ctime(&pause)); (void) printf(gettext("\tscrub started on %s"), ctime(&start)); } } else if (is_resilver) { (void) printf(gettext("resilver in progress since %s"), ctime(&start)); } scanned = ps->pss_examined; pass_scanned = ps->pss_pass_exam; issued = ps->pss_issued; pass_issued = ps->pss_pass_issued; total = ps->pss_to_examine; /* we are only done with a block once we have issued the IO for it */ fraction_done = (double)issued / total; /* elapsed time for this pass, rounding up to 1 if it's 0 */ elapsed = time(NULL) - ps->pss_pass_start; elapsed -= ps->pss_pass_scrub_spent_paused; elapsed = (elapsed != 0) ? elapsed : 1; scan_rate = pass_scanned / elapsed; issue_rate = pass_issued / elapsed; uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ? ((total - issued) / issue_rate) : UINT64_MAX; secs_to_dhms(total_secs_left, time_buf); /* format all of the numbers we will be reporting */ zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf)); zfs_nicebytes(issued, issued_buf, sizeof (issued_buf)); zfs_nicebytes(total, total_buf, sizeof (total_buf)); zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf)); zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf)); /* do not print estimated time if we have a paused scrub */ if (pause == 0) { (void) printf(gettext("\t%s scanned at %s/s, " "%s issued at %s/s, %s total\n"), scanned_buf, srate_buf, issued_buf, irate_buf, total_buf); } else { (void) printf(gettext("\t%s scanned, %s issued, %s total\n"), scanned_buf, issued_buf, total_buf); } if (is_resilver) { (void) printf(gettext("\t%s resilvered, %.2f%% done"), processed_buf, 100 * fraction_done); } else if (is_scrub) { (void) printf(gettext("\t%s repaired, %.2f%% done"), processed_buf, 100 * fraction_done); } if (pause == 0) { /* * Only provide an estimate iff: * 1) the time remaining is valid, and * 2) the issue rate exceeds 10 MB/s, and * 3) it's either: * a) a resilver which has started repairs, or * b) a scrub which has entered the issue phase. */ if (total_secs_left != UINT64_MAX && issue_rate >= 10 * 1024 * 1024 && ((is_resilver && ps->pss_processed > 0) || (is_scrub && issued > 0))) { (void) printf(gettext(", %s to go\n"), time_buf); } else { (void) printf(gettext(", no estimated " "completion time\n")); } } else { (void) printf(gettext("\n")); } } static void print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name) { if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE) return; printf(" "); printf_color(ANSI_BOLD, gettext("scan:")); printf(" "); uint64_t bytes_scanned = vrs->vrs_bytes_scanned; uint64_t bytes_issued = vrs->vrs_bytes_issued; uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt; uint64_t bytes_est = vrs->vrs_bytes_est; uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned / (vrs->vrs_pass_time_ms + 1)) * 1000; uint64_t issue_rate = (vrs->vrs_pass_bytes_issued / (vrs->vrs_pass_time_ms + 1)) * 1000; double scan_pct = MIN((double)bytes_scanned * 100 / (bytes_est + 1), 100); /* Format all of the numbers we will be reporting */ char bytes_scanned_buf[7], bytes_issued_buf[7]; char bytes_rebuilt_buf[7], bytes_est_buf[7]; char scan_rate_buf[7], issue_rate_buf[7], time_buf[32]; zfs_nicebytes(bytes_scanned, bytes_scanned_buf, sizeof (bytes_scanned_buf)); zfs_nicebytes(bytes_issued, bytes_issued_buf, sizeof (bytes_issued_buf)); zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf, sizeof (bytes_rebuilt_buf)); zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf)); zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf)); zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf)); time_t start = vrs->vrs_start_time; time_t end = vrs->vrs_end_time; /* Rebuild is finished or canceled. */ if (vrs->vrs_state == VDEV_REBUILD_COMPLETE) { secs_to_dhms(vrs->vrs_scan_time_ms / 1000, time_buf); (void) printf(gettext("resilvered (%s) %s in %s " "with %llu errors on %s"), vdev_name, bytes_rebuilt_buf, time_buf, (u_longlong_t)vrs->vrs_errors, ctime(&end)); return; } else if (vrs->vrs_state == VDEV_REBUILD_CANCELED) { (void) printf(gettext("resilver (%s) canceled on %s"), vdev_name, ctime(&end)); return; } else if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) { (void) printf(gettext("resilver (%s) in progress since %s"), vdev_name, ctime(&start)); } assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE); secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) / MAX(scan_rate, 1), time_buf); (void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, " "%s total\n"), bytes_scanned_buf, scan_rate_buf, bytes_issued_buf, issue_rate_buf, bytes_est_buf); (void) printf(gettext("\t%s resilvered, %.2f%% done"), bytes_rebuilt_buf, scan_pct); if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) { if (scan_rate >= 10 * 1024 * 1024) { (void) printf(gettext(", %s to go\n"), time_buf); } else { (void) printf(gettext(", no estimated " "completion time\n")); } } else { (void) printf(gettext("\n")); } } /* * Print rebuild status for top-level vdevs. */ static void print_rebuild_status(zpool_handle_t *zhp, nvlist_t *nvroot) { nvlist_t **child; uint_t children; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; for (uint_t c = 0; c < children; c++) { vdev_rebuild_stat_t *vrs; uint_t i; if (nvlist_lookup_uint64_array(child[c], ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) { char *name = zpool_vdev_name(g_zfs, zhp, child[c], VDEV_NAME_TYPE_ID); print_rebuild_status_impl(vrs, name); free(name); } } } /* * As we don't scrub checkpointed blocks, we want to warn the user that we * skipped scanning some blocks if a checkpoint exists or existed at any * time during the scan. If a sequential instead of healing reconstruction * was performed then the blocks were reconstructed. However, their checksums * have not been verified so we still print the warning. */ static void print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs) { if (ps == NULL || pcs == NULL) return; if (pcs->pcs_state == CS_NONE || pcs->pcs_state == CS_CHECKPOINT_DISCARDING) return; assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS); if (ps->pss_state == DSS_NONE) return; if ((ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) && ps->pss_end_time < pcs->pcs_start_time) return; if (ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) { (void) printf(gettext(" scan warning: skipped blocks " "that are only referenced by the checkpoint.\n")); } else { assert(ps->pss_state == DSS_SCANNING); (void) printf(gettext(" scan warning: skipping blocks " "that are only referenced by the checkpoint.\n")); } } /* * Returns B_TRUE if there is an active rebuild in progress. Otherwise, * B_FALSE is returned and 'rebuild_end_time' is set to the end time for * the last completed (or cancelled) rebuild. */ static boolean_t check_rebuilding(nvlist_t *nvroot, uint64_t *rebuild_end_time) { nvlist_t **child; uint_t children; boolean_t rebuilding = B_FALSE; uint64_t end_time = 0; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; for (uint_t c = 0; c < children; c++) { vdev_rebuild_stat_t *vrs; uint_t i; if (nvlist_lookup_uint64_array(child[c], ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) { if (vrs->vrs_end_time > end_time) end_time = vrs->vrs_end_time; if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) { rebuilding = B_TRUE; end_time = 0; break; } } } if (rebuild_end_time != NULL) *rebuild_end_time = end_time; return (rebuilding); } /* * Print the scan status. */ static void print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) { uint64_t rebuild_end_time = 0, resilver_end_time = 0; boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE; boolean_t active_resilver = B_FALSE; pool_checkpoint_stat_t *pcs = NULL; pool_scan_stat_t *ps = NULL; uint_t c; if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c) == 0) { if (ps->pss_func == POOL_SCAN_RESILVER) { resilver_end_time = ps->pss_end_time; active_resilver = (ps->pss_state == DSS_SCANNING); } have_resilver = (ps->pss_func == POOL_SCAN_RESILVER); have_scrub = (ps->pss_func == POOL_SCAN_SCRUB); } boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time); boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0)); /* Always print the scrub status when available. */ if (have_scrub) print_scan_scrub_resilver_status(ps); /* * When there is an active resilver or rebuild print its status. * Otherwise print the status of the last resilver or rebuild. */ if (active_resilver || (!active_rebuild && have_resilver && resilver_end_time && resilver_end_time > rebuild_end_time)) { print_scan_scrub_resilver_status(ps); } else if (active_rebuild || (!active_resilver && have_rebuild && rebuild_end_time && rebuild_end_time > resilver_end_time)) { print_rebuild_status(zhp, nvroot); } (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); print_checkpoint_scan_warning(ps, pcs); } /* * Print out detailed removal status. */ static void print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs) { char copied_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; time_t start, end; nvlist_t *config, *nvroot; nvlist_t **child; uint_t children; char *vdev_name; if (prs == NULL || prs->prs_state == DSS_NONE) return; /* * Determine name of vdev. */ config = zpool_get_config(zhp, NULL); nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); assert(prs->prs_removing_vdev < children); vdev_name = zpool_vdev_name(g_zfs, zhp, child[prs->prs_removing_vdev], B_TRUE); printf_color(ANSI_BOLD, gettext("remove: ")); start = prs->prs_start_time; end = prs->prs_end_time; zfs_nicenum(prs->prs_copied, copied_buf, sizeof (copied_buf)); /* * Removal is finished or canceled. */ if (prs->prs_state == DSS_FINISHED) { uint64_t minutes_taken = (end - start) / 60; (void) printf(gettext("Removal of vdev %llu copied %s " "in %lluh%um, completed on %s"), (longlong_t)prs->prs_removing_vdev, copied_buf, (u_longlong_t)(minutes_taken / 60), (uint_t)(minutes_taken % 60), ctime((time_t *)&end)); } else if (prs->prs_state == DSS_CANCELED) { (void) printf(gettext("Removal of %s canceled on %s"), vdev_name, ctime(&end)); } else { uint64_t copied, total, elapsed, mins_left, hours_left; double fraction_done; uint_t rate; assert(prs->prs_state == DSS_SCANNING); /* * Removal is in progress. */ (void) printf(gettext( "Evacuation of %s in progress since %s"), vdev_name, ctime(&start)); copied = prs->prs_copied > 0 ? prs->prs_copied : 1; total = prs->prs_to_copy; fraction_done = (double)copied / total; /* elapsed time for this pass */ elapsed = time(NULL) - prs->prs_start_time; elapsed = elapsed > 0 ? elapsed : 1; rate = copied / elapsed; rate = rate > 0 ? rate : 1; mins_left = ((total - copied) / rate) / 60; hours_left = mins_left / 60; zfs_nicenum(copied, examined_buf, sizeof (examined_buf)); zfs_nicenum(total, total_buf, sizeof (total_buf)); zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); /* * do not print estimated time if hours_left is more than * 30 days */ (void) printf(gettext( "\t%s copied out of %s at %s/s, %.2f%% done"), examined_buf, total_buf, rate_buf, 100 * fraction_done); if (hours_left < (30 * 24)) { (void) printf(gettext(", %lluh%um to go\n"), (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); } else { (void) printf(gettext( ", (copy is slow, no estimated time)\n")); } } free(vdev_name); if (prs->prs_mapping_memory > 0) { char mem_buf[7]; zfs_nicenum(prs->prs_mapping_memory, mem_buf, sizeof (mem_buf)); (void) printf(gettext( "\t%s memory used for removed device mappings\n"), mem_buf); } } static void print_checkpoint_status(pool_checkpoint_stat_t *pcs) { time_t start; char space_buf[7]; if (pcs == NULL || pcs->pcs_state == CS_NONE) return; (void) printf(gettext("checkpoint: ")); start = pcs->pcs_start_time; zfs_nicenum(pcs->pcs_space, space_buf, sizeof (space_buf)); if (pcs->pcs_state == CS_CHECKPOINT_EXISTS) { char *date = ctime(&start); /* * ctime() adds a newline at the end of the generated * string, thus the weird format specifier and the * strlen() call used to chop it off from the output. */ (void) printf(gettext("created %.*s, consumes %s\n"), (int)(strlen(date) - 1), date, space_buf); return; } assert(pcs->pcs_state == CS_CHECKPOINT_DISCARDING); (void) printf(gettext("discarding, %s remaining.\n"), space_buf); } static void print_error_log(zpool_handle_t *zhp) { nvlist_t *nverrlist = NULL; nvpair_t *elem; char *pathname; size_t len = MAXPATHLEN * 2; if (zpool_get_errlog(zhp, &nverrlist) != 0) return; (void) printf("errors: Permanent errors have been " "detected in the following files:\n\n"); pathname = safe_malloc(len); elem = NULL; while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) { nvlist_t *nv; uint64_t dsobj, obj; verify(nvpair_value_nvlist(elem, &nv) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET, &dsobj) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT, &obj) == 0); zpool_obj_to_path(zhp, dsobj, obj, pathname, len); (void) printf("%7s %s\n", "", pathname); } free(pathname); nvlist_free(nverrlist); } static void print_spares(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **spares, uint_t nspares) { uint_t i; char *name; if (nspares == 0) return; (void) printf(gettext("\tspares\n")); for (i = 0; i < nspares; i++) { name = zpool_vdev_name(g_zfs, zhp, spares[i], cb->cb_name_flags); print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, NULL); free(name); } } static void print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache, uint_t nl2cache) { uint_t i; char *name; if (nl2cache == 0) return; (void) printf(gettext("\tcache\n")); for (i = 0; i < nl2cache; i++) { name = zpool_vdev_name(g_zfs, zhp, l2cache[i], cb->cb_name_flags); print_status_config(zhp, cb, name, l2cache[i], 2, B_FALSE, NULL); free(name); } } static void print_dedup_stats(nvlist_t *config) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; uint_t c; char dspace[6], mspace[6]; /* * If the pool was faulted then we may not have been able to * obtain the config. Otherwise, if we have anything in the dedup * table continue processing the stats. */ if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t **)&ddo, &c) != 0) return; (void) printf("\n"); (void) printf(gettext(" dedup: ")); if (ddo->ddo_count == 0) { (void) printf(gettext("no DDT entries\n")); return; } zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace)); zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace)); (void) printf("DDT entries %llu, size %s on disk, %s in core\n", (u_longlong_t)ddo->ddo_count, dspace, mspace); verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, (uint64_t **)&dds, &c) == 0); verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t **)&ddh, &c) == 0); zpool_dump_ddt(dds, ddh); } /* * Display a summary of pool status. Displays a summary such as: * * pool: tank * status: DEGRADED * reason: One or more devices ... * see: https://openzfs.github.io/openzfs-docs/msg/ZFS-xxxx-01 * config: * mirror DEGRADED * c1t0d0 OK * c2t0d0 UNAVAIL * * When given the '-v' option, we print out the complete config. If the '-e' * option is specified, then we print out error rate information as well. */ static int status_callback(zpool_handle_t *zhp, void *data) { status_cbdata_t *cbp = data; nvlist_t *config, *nvroot; char *msgid; zpool_status_t reason; zpool_errata_t errata; const char *health; uint_t c; vdev_stat_t *vs; config = zpool_get_config(zhp, NULL); reason = zpool_get_status(zhp, &msgid, &errata); cbp->cb_count++; /* * If we were given 'zpool status -x', only report those pools with * problems. */ if (cbp->cb_explain && (reason == ZPOOL_STATUS_OK || reason == ZPOOL_STATUS_VERSION_OLDER || reason == ZPOOL_STATUS_FEAT_DISABLED || reason == ZPOOL_STATUS_COMPATIBILITY_ERR || reason == ZPOOL_STATUS_INCOMPATIBLE_FEAT)) { if (!cbp->cb_allpools) { (void) printf(gettext("pool '%s' is healthy\n"), zpool_get_name(zhp)); if (cbp->cb_first) cbp->cb_first = B_FALSE; } return (0); } if (cbp->cb_first) cbp->cb_first = B_FALSE; else (void) printf("\n"); nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); health = zpool_get_state_str(zhp); printf(" "); printf_color(ANSI_BOLD, gettext("pool:")); printf(" %s\n", zpool_get_name(zhp)); printf(" "); printf_color(ANSI_BOLD, gettext("state: ")); printf_color(health_str_to_color(health), "%s", health); printf("\n"); switch (reason) { case ZPOOL_STATUS_MISSING_DEV_R: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices could " "not be opened. Sufficient replicas exist for\n\tthe pool " "to continue functioning in a degraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Attach the missing device " "and online it using 'zpool online'.\n")); break; case ZPOOL_STATUS_MISSING_DEV_NR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices could " "not be opened. There are insufficient\n\treplicas for the" " pool to continue functioning.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Attach the missing device " "and online it using 'zpool online'.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices could " "not be used because the label is missing or\n\tinvalid. " "Sufficient replicas exist for the pool to continue\n\t" "functioning in a degraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Replace the device using " "'zpool replace'.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_NR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices could " "not be used because the label is missing \n\tor invalid. " "There are insufficient replicas for the pool to " "continue\n\tfunctioning.\n")); zpool_explain_recover(zpool_get_handle(zhp), zpool_get_name(zhp), reason, config); break; case ZPOOL_STATUS_FAILING_DEV: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices has " "experienced an unrecoverable error. An\n\tattempt was " "made to correct the error. Applications are " "unaffected.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Determine if the " "device needs to be replaced, and clear the errors\n\tusing" " 'zpool clear' or replace the device with 'zpool " "replace'.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices has " "been taken offline by the administrator.\n\tSufficient " "replicas exist for the pool to continue functioning in " "a\n\tdegraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Online the device " "using 'zpool online' or replace the device with\n\t'zpool " "replace'.\n")); break; case ZPOOL_STATUS_REMOVED_DEV: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices has " "been removed by the administrator.\n\tSufficient " "replicas exist for the pool to continue functioning in " "a\n\tdegraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Online the device " "using zpool online' or replace the device with\n\t'zpool " "replace'.\n")); break; case ZPOOL_STATUS_RESILVERING: case ZPOOL_STATUS_REBUILDING: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices is " "currently being resilvered. The pool will\n\tcontinue " "to function, possibly in a degraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Wait for the resilver to " "complete.\n")); break; case ZPOOL_STATUS_REBUILD_SCRUB: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices have " "been sequentially resilvered, scrubbing\n\tthe pool " "is recommended.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Use 'zpool scrub' to " "verify all data checksums.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices has " "experienced an error resulting in data\n\tcorruption. " "Applications may be affected.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Restore the file in question" " if possible. Otherwise restore the\n\tentire pool from " "backup.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool metadata is " "corrupted and the pool cannot be opened.\n")); zpool_explain_recover(zpool_get_handle(zhp), zpool_get_name(zhp), reason, config); break; case ZPOOL_STATUS_VERSION_OLDER: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is formatted using " "a legacy on-disk format. The pool can\n\tstill be used, " "but some features are unavailable.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Upgrade the pool using " "'zpool upgrade'. Once this is done, the\n\tpool will no " "longer be accessible on software that does not support\n\t" "feature flags.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool has been upgraded " "to a newer, incompatible on-disk version.\n\tThe pool " "cannot be accessed on this system.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Access the pool from a " "system running more recent software, or\n\trestore the " "pool from backup.\n")); break; case ZPOOL_STATUS_FEAT_DISABLED: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Some supported and " "requested features are not enabled on the pool.\n\t" "The pool can still be used, but some features are " "unavailable.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Enable all features using " "'zpool upgrade'. Once this is done,\n\tthe pool may no " "longer be accessible by software that does not support\n\t" "the features. See zpool-features(7) for details.\n")); break; case ZPOOL_STATUS_COMPATIBILITY_ERR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("This pool has a " "compatibility list specified, but it could not be\n\t" "read/parsed at this time. The pool can still be used, " "but this\n\tshould be investigated.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Check the value of the " "'compatibility' property against the\n\t" "appropriate file in " ZPOOL_SYSCONF_COMPAT_D " or " ZPOOL_DATA_COMPAT_D ".\n")); break; case ZPOOL_STATUS_INCOMPATIBLE_FEAT: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more features " "are enabled on the pool despite not being\n\t" "requested by the 'compatibility' property.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Consider setting " "'compatibility' to an appropriate value, or\n\t" "adding needed features to the relevant file in\n\t" ZPOOL_SYSCONF_COMPAT_D " or " ZPOOL_DATA_COMPAT_D ".\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed " "on this system because it uses the\n\tfollowing feature(s)" " not supported on this system:\n")); zpool_print_unsup_feat(config); (void) printf("\n"); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Access the pool from a " "system that supports the required feature(s),\n\tor " "restore the pool from backup.\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool can only be " "accessed in read-only mode on this system. It\n\tcannot be" " accessed in read-write mode because it uses the " "following\n\tfeature(s) not supported on this system:\n")); zpool_print_unsup_feat(config); (void) printf("\n"); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed " "in read-write mode. Import the pool with\n" "\t\"-o readonly=on\", access the pool from a system that " "supports the\n\trequired feature(s), or restore the " "pool from backup.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_R: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "faulted in response to persistent errors.\n\tSufficient " "replicas exist for the pool to continue functioning " "in a\n\tdegraded state.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Replace the faulted device, " "or use 'zpool clear' to mark the device\n\trepaired.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_NR: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "faulted in response to persistent errors. There are " "insufficient replicas for the pool to\n\tcontinue " "functioning.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Destroy and re-create the " "pool from a backup source. Manually marking the device\n" "\trepaired using 'zpool clear' may allow some data " "to be recovered.\n")); break; case ZPOOL_STATUS_IO_FAILURE_MMP: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is suspended " "because multihost writes failed or were delayed;\n\t" "another system could import the pool undetected.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Make sure the pool's devices" " are connected, then reboot your system and\n\timport the " "pool.\n")); break; case ZPOOL_STATUS_IO_FAILURE_WAIT: case ZPOOL_STATUS_IO_FAILURE_CONTINUE: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "faulted in response to IO failures.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Make sure the affected " "devices are connected, then run 'zpool clear'.\n")); break; case ZPOOL_STATUS_BAD_LOG: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("An intent log record " "could not be read.\n" "\tWaiting for administrator intervention to fix the " "faulted pool.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Either restore the affected " "device(s) and run 'zpool online',\n" "\tor ignore the intent log records by running " "'zpool clear'.\n")); break; case ZPOOL_STATUS_NON_NATIVE_ASHIFT: (void) printf(gettext("status: One or more devices are " "configured to use a non-native block size.\n" "\tExpect reduced performance.\n")); (void) printf(gettext("action: Replace affected devices with " "devices that support the\n\tconfigured block size, or " "migrate data to a properly configured\n\tpool.\n")); break; case ZPOOL_STATUS_HOSTID_MISMATCH: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Mismatch between pool hostid" " and system hostid on imported pool.\n\tThis pool was " "previously imported into a system with a different " "hostid,\n\tand then was verbatim imported into this " "system.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("Export this pool on all " "systems on which it is imported.\n" "\tThen import it to correct the mismatch.\n")); break; case ZPOOL_STATUS_ERRATA: printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"), errata); switch (errata) { case ZPOOL_ERRATA_NONE: break; case ZPOOL_ERRATA_ZOL_2094_SCRUB: printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("To correct the issue" " run 'zpool scrub'.\n")); break; case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: (void) printf(gettext("\tExisting encrypted datasets " "contain an on-disk incompatibility\n\twhich " "needs to be corrected.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("To correct the issue" " backup existing encrypted datasets to new\n\t" "encrypted datasets and destroy the old ones. " "'zfs mount -o ro' can\n\tbe used to temporarily " "mount existing encrypted datasets readonly.\n")); break; case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: (void) printf(gettext("\tExisting encrypted snapshots " "and bookmarks contain an on-disk\n\tincompat" "ibility. This may cause on-disk corruption if " "they are used\n\twith 'zfs recv'.\n")); printf_color(ANSI_BOLD, gettext("action: ")); printf_color(ANSI_YELLOW, gettext("To correct the" "issue, enable the bookmark_v2 feature. No " "additional\n\taction is needed if there are no " "encrypted snapshots or bookmarks.\n\tIf preserving" "the encrypted snapshots and bookmarks is required," " use\n\ta non-raw send to backup and restore them." " Alternately, they may be\n\tremoved to resolve " "the incompatibility.\n")); break; default: /* * All errata which allow the pool to be imported * must contain an action message. */ assert(0); } break; default: /* * The remaining errors can't actually be generated, yet. */ assert(reason == ZPOOL_STATUS_OK); } if (msgid != NULL) { printf(" "); printf_color(ANSI_BOLD, gettext("see:")); printf(gettext( " https://openzfs.github.io/openzfs-docs/msg/%s\n"), msgid); } if (config != NULL) { uint64_t nerr; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; pool_checkpoint_stat_t *pcs = NULL; pool_removal_stat_t *prs = NULL; print_scan_status(zhp, nvroot); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); print_removal_status(zhp, prs); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); print_checkpoint_status(pcs); cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0, cbp->cb_name_flags | VDEV_NAME_TYPE_ID); if (cbp->cb_namewidth < 10) cbp->cb_namewidth = 10; color_start(ANSI_BOLD); (void) printf(gettext("config:\n\n")); (void) printf(gettext("\t%-*s %-8s %5s %5s %5s"), cbp->cb_namewidth, "NAME", "STATE", "READ", "WRITE", "CKSUM"); color_end(); if (cbp->cb_print_slow_ios) { printf_color(ANSI_BOLD, " %5s", gettext("SLOW")); } + if (cbp->cb_print_power) { + printf_color(ANSI_BOLD, " %5s", gettext("POWER")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); printf("\n"); print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0, B_FALSE, NULL); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_CLASS_LOGS); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) print_l2cache(zhp, cbp, l2cache, nl2cache); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) print_spares(zhp, cbp, spares, nspares); if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, &nerr) == 0) { nvlist_t *nverrlist = NULL; /* * If the approximate error count is small, get a * precise count by fetching the entire log and * uniquifying the results. */ if (nerr > 0 && nerr < 100 && !cbp->cb_verbose && zpool_get_errlog(zhp, &nverrlist) == 0) { nvpair_t *elem; elem = NULL; nerr = 0; while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) { nerr++; } } nvlist_free(nverrlist); (void) printf("\n"); if (nerr == 0) (void) printf(gettext("errors: No known data " "errors\n")); else if (!cbp->cb_verbose) (void) printf(gettext("errors: %llu data " "errors, use '-v' for a list\n"), (u_longlong_t)nerr); else print_error_log(zhp); } if (cbp->cb_dedup_stats) print_dedup_stats(config); } else { (void) printf(gettext("config: The configuration cannot be " "determined.\n")); } return (0); } /* - * zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ... - * [interval [count]] + * zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ... + * [pool] [interval [count]] * * -c CMD For each vdev, run command CMD * -i Display vdev initialization status. * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. * -p Display values in parsable (exact) format. * -P Display full path for vdev name. * -s Display slow IOs column. * -v Display complete error logs * -x Display only pools with potential problems * -D Display dedup status (undocumented) * -t Display vdev TRIM status. * -T Display a timestamp in date(1) or Unix format + * --power Display vdev enclosure slot power status * * Describes the health status of all pools or some subset. */ int zpool_do_status(int argc, char **argv) { int c; int ret; float interval = 0; unsigned long count = 0; status_cbdata_t cb = { 0 }; char *cmd = NULL; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) { + while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options, + NULL)) != -1) { switch (c) { case 'c': if (cmd != NULL) { fprintf(stderr, gettext("Can't set -c flag twice\n")); exit(1); } if (getenv("ZPOOL_SCRIPTS_ENABLED") != NULL && !libzfs_envvar_is_set("ZPOOL_SCRIPTS_ENABLED")) { fprintf(stderr, gettext( "Can't run -c, disabled by " "ZPOOL_SCRIPTS_ENABLED.\n")); exit(1); } if ((getuid() <= 0 || geteuid() <= 0) && !libzfs_envvar_is_set("ZPOOL_SCRIPTS_AS_ROOT")) { fprintf(stderr, gettext( "Can't run -c with root privileges " "unless ZPOOL_SCRIPTS_AS_ROOT is set.\n")); exit(1); } cmd = optarg; break; case 'i': cb.cb_print_vdev_init = B_TRUE; break; case 'g': cb.cb_name_flags |= VDEV_NAME_GUID; break; case 'L': cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; break; case 'p': cb.cb_literal = B_TRUE; break; case 'P': cb.cb_name_flags |= VDEV_NAME_PATH; break; case 's': cb.cb_print_slow_ios = B_TRUE; break; case 'v': cb.cb_verbose = B_TRUE; break; case 'x': cb.cb_explain = B_TRUE; break; case 'D': cb.cb_dedup_stats = B_TRUE; break; case 't': cb.cb_print_vdev_trim = B_TRUE; break; case 'T': get_timestamp_arg(*optarg); break; + case POWER_OPT: + cb.cb_print_power = B_TRUE; + break; case '?': if (optopt == 'c') { print_zpool_script_list("status"); exit(0); } else { fprintf(stderr, gettext("invalid option '%c'\n"), optopt); } usage(B_FALSE); } } argc -= optind; argv += optind; get_interval_count(&argc, argv, &interval, &count); if (argc == 0) cb.cb_allpools = B_TRUE; cb.cb_first = B_TRUE; cb.cb_print_status = B_TRUE; for (;;) { if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); if (cmd != NULL) cb.vcdl = all_pools_for_each_vdev_run(argc, argv, cmd, NULL, NULL, 0, 0); ret = for_each_pool(argc, argv, B_TRUE, NULL, cb.cb_literal, status_callback, &cb); if (cb.vcdl != NULL) free_vdev_cmd_data_list(cb.vcdl); if (argc == 0 && cb.cb_count == 0) (void) fprintf(stderr, gettext("no pools available\n")); else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) (void) printf(gettext("all pools are healthy\n")); if (ret != 0) return (ret); if (interval == 0) break; if (count != 0 && --count == 0) break; (void) fsleep(interval); } return (0); } typedef struct upgrade_cbdata { int cb_first; int cb_argc; uint64_t cb_version; char **cb_argv; } upgrade_cbdata_t; static int check_unsupp_fs(zfs_handle_t *zhp, void *unsupp_fs) { int zfs_version = (int)zfs_prop_get_int(zhp, ZFS_PROP_VERSION); int *count = (int *)unsupp_fs; if (zfs_version > ZPL_VERSION) { (void) printf(gettext("%s (v%d) is not supported by this " "implementation of ZFS.\n"), zfs_get_name(zhp), zfs_version); (*count)++; } zfs_iter_filesystems(zhp, check_unsupp_fs, unsupp_fs); zfs_close(zhp); return (0); } static int upgrade_version(zpool_handle_t *zhp, uint64_t version) { int ret; nvlist_t *config; uint64_t oldversion; int unsupp_fs = 0; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &oldversion) == 0); char compat[ZFS_MAXPROPLEN]; if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compat, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compat[0] = '\0'; assert(SPA_VERSION_IS_SUPPORTED(oldversion)); assert(oldversion < version); ret = zfs_iter_root(zpool_get_handle(zhp), check_unsupp_fs, &unsupp_fs); if (ret != 0) return (ret); if (unsupp_fs) { (void) fprintf(stderr, gettext("Upgrade not performed due " "to %d unsupported filesystems (max v%d).\n"), unsupp_fs, (int)ZPL_VERSION); return (1); } if (strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0) { (void) fprintf(stderr, gettext("Upgrade not performed because " "'compatibility' property set to '" ZPOOL_COMPAT_LEGACY "'.\n")); return (1); } ret = zpool_upgrade(zhp, version); if (ret != 0) return (ret); if (version >= SPA_VERSION_FEATURES) { (void) printf(gettext("Successfully upgraded " "'%s' from version %llu to feature flags.\n"), zpool_get_name(zhp), (u_longlong_t)oldversion); } else { (void) printf(gettext("Successfully upgraded " "'%s' from version %llu to version %llu.\n"), zpool_get_name(zhp), (u_longlong_t)oldversion, (u_longlong_t)version); } return (0); } static int upgrade_enable_all(zpool_handle_t *zhp, int *countp) { int i, ret, count; boolean_t firstff = B_TRUE; nvlist_t *enabled = zpool_get_features(zhp); char compat[ZFS_MAXPROPLEN]; if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compat, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compat[0] = '\0'; boolean_t requested_features[SPA_FEATURES]; if (zpool_do_load_compat(compat, requested_features) != ZPOOL_COMPATIBILITY_OK) return (-1); count = 0; for (i = 0; i < SPA_FEATURES; i++) { const char *fname = spa_feature_table[i].fi_uname; const char *fguid = spa_feature_table[i].fi_guid; if (!spa_feature_table[i].fi_zfs_mod_supported) continue; if (!nvlist_exists(enabled, fguid) && requested_features[i]) { char *propname; verify(-1 != asprintf(&propname, "feature@%s", fname)); ret = zpool_set_prop(zhp, propname, ZFS_FEATURE_ENABLED); if (ret != 0) { free(propname); return (ret); } count++; if (firstff) { (void) printf(gettext("Enabled the " "following features on '%s':\n"), zpool_get_name(zhp)); firstff = B_FALSE; } (void) printf(gettext(" %s\n"), fname); free(propname); } } if (countp != NULL) *countp = count; return (0); } static int upgrade_cb(zpool_handle_t *zhp, void *arg) { upgrade_cbdata_t *cbp = arg; nvlist_t *config; uint64_t version; boolean_t modified_pool = B_FALSE; int ret; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); assert(SPA_VERSION_IS_SUPPORTED(version)); if (version < cbp->cb_version) { cbp->cb_first = B_FALSE; ret = upgrade_version(zhp, cbp->cb_version); if (ret != 0) return (ret); modified_pool = B_TRUE; /* * If they did "zpool upgrade -a", then we could * be doing ioctls to different pools. We need * to log this history once to each pool, and bypass * the normal history logging that happens in main(). */ (void) zpool_log_history(g_zfs, history_str); log_history = B_FALSE; } if (cbp->cb_version >= SPA_VERSION_FEATURES) { int count; ret = upgrade_enable_all(zhp, &count); if (ret != 0) return (ret); if (count > 0) { cbp->cb_first = B_FALSE; modified_pool = B_TRUE; } } if (modified_pool) { (void) printf("\n"); (void) after_zpool_upgrade(zhp); } return (0); } static int upgrade_list_older_cb(zpool_handle_t *zhp, void *arg) { upgrade_cbdata_t *cbp = arg; nvlist_t *config; uint64_t version; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); assert(SPA_VERSION_IS_SUPPORTED(version)); if (version < SPA_VERSION_FEATURES) { if (cbp->cb_first) { (void) printf(gettext("The following pools are " "formatted with legacy version numbers and can\n" "be upgraded to use feature flags. After " "being upgraded, these pools\nwill no " "longer be accessible by software that does not " "support feature\nflags.\n\n" "Note that setting a pool's 'compatibility' " "feature to '" ZPOOL_COMPAT_LEGACY "' will\n" "inhibit upgrades.\n\n")); (void) printf(gettext("VER POOL\n")); (void) printf(gettext("--- ------------\n")); cbp->cb_first = B_FALSE; } (void) printf("%2llu %s\n", (u_longlong_t)version, zpool_get_name(zhp)); } return (0); } static int upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) { upgrade_cbdata_t *cbp = arg; nvlist_t *config; uint64_t version; config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); if (version >= SPA_VERSION_FEATURES) { int i; boolean_t poolfirst = B_TRUE; nvlist_t *enabled = zpool_get_features(zhp); for (i = 0; i < SPA_FEATURES; i++) { const char *fguid = spa_feature_table[i].fi_guid; const char *fname = spa_feature_table[i].fi_uname; if (!spa_feature_table[i].fi_zfs_mod_supported) continue; if (!nvlist_exists(enabled, fguid)) { if (cbp->cb_first) { (void) printf(gettext("\nSome " "supported features are not " "enabled on the following pools. " "Once a\nfeature is enabled the " "pool may become incompatible with " "software\nthat does not support " "the feature. See " "zpool-features(7) for " "details.\n\n" "Note that the pool " "'compatibility' feature can be " "used to inhibit\nfeature " "upgrades.\n\n")); (void) printf(gettext("POOL " "FEATURE\n")); (void) printf(gettext("------" "---------\n")); cbp->cb_first = B_FALSE; } if (poolfirst) { (void) printf(gettext("%s\n"), zpool_get_name(zhp)); poolfirst = B_FALSE; } (void) printf(gettext(" %s\n"), fname); } /* * If they did "zpool upgrade -a", then we could * be doing ioctls to different pools. We need * to log this history once to each pool, and bypass * the normal history logging that happens in main(). */ (void) zpool_log_history(g_zfs, history_str); log_history = B_FALSE; } } return (0); } /* ARGSUSED */ static int upgrade_one(zpool_handle_t *zhp, void *data) { boolean_t modified_pool = B_FALSE; upgrade_cbdata_t *cbp = data; uint64_t cur_version; int ret; if (strcmp("log", zpool_get_name(zhp)) == 0) { (void) fprintf(stderr, gettext("'log' is now a reserved word\n" "Pool 'log' must be renamed using export and import" " to upgrade.\n")); return (1); } cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); if (cur_version > cbp->cb_version) { (void) printf(gettext("Pool '%s' is already formatted " "using more current version '%llu'.\n\n"), zpool_get_name(zhp), (u_longlong_t)cur_version); return (0); } if (cbp->cb_version != SPA_VERSION && cur_version == cbp->cb_version) { (void) printf(gettext("Pool '%s' is already formatted " "using version %llu.\n\n"), zpool_get_name(zhp), (u_longlong_t)cbp->cb_version); return (0); } if (cur_version != cbp->cb_version) { modified_pool = B_TRUE; ret = upgrade_version(zhp, cbp->cb_version); if (ret != 0) return (ret); } if (cbp->cb_version >= SPA_VERSION_FEATURES) { int count = 0; ret = upgrade_enable_all(zhp, &count); if (ret != 0) return (ret); if (count != 0) { modified_pool = B_TRUE; } else if (cur_version == SPA_VERSION) { (void) printf(gettext("Pool '%s' already has all " "supported and requested features enabled.\n"), zpool_get_name(zhp)); } } if (modified_pool) { (void) printf("\n"); (void) after_zpool_upgrade(zhp); } return (0); } /* * zpool upgrade * zpool upgrade -v * zpool upgrade [-V version] <-a | pool ...> * * With no arguments, display downrev'd ZFS pool available for upgrade. * Individual pools can be upgraded by specifying the pool, and '-a' will * upgrade all pools. */ int zpool_do_upgrade(int argc, char **argv) { int c; upgrade_cbdata_t cb = { 0 }; int ret = 0; boolean_t showversions = B_FALSE; boolean_t upgradeall = B_FALSE; char *end; /* check options */ while ((c = getopt(argc, argv, ":avV:")) != -1) { switch (c) { case 'a': upgradeall = B_TRUE; break; case 'v': showversions = B_TRUE; break; case 'V': cb.cb_version = strtoll(optarg, &end, 10); if (*end != '\0' || !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) { (void) fprintf(stderr, gettext("invalid version '%s'\n"), optarg); usage(B_FALSE); } break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } cb.cb_argc = argc; cb.cb_argv = argv; argc -= optind; argv += optind; if (cb.cb_version == 0) { cb.cb_version = SPA_VERSION; } else if (!upgradeall && argc == 0) { (void) fprintf(stderr, gettext("-V option is " "incompatible with other arguments\n")); usage(B_FALSE); } if (showversions) { if (upgradeall || argc != 0) { (void) fprintf(stderr, gettext("-v option is " "incompatible with other arguments\n")); usage(B_FALSE); } } else if (upgradeall) { if (argc != 0) { (void) fprintf(stderr, gettext("-a option should not " "be used along with a pool name\n")); usage(B_FALSE); } } (void) printf(gettext("This system supports ZFS pool feature " "flags.\n\n")); if (showversions) { int i; (void) printf(gettext("The following features are " "supported:\n\n")); (void) printf(gettext("FEAT DESCRIPTION\n")); (void) printf("----------------------------------------------" "---------------\n"); for (i = 0; i < SPA_FEATURES; i++) { zfeature_info_t *fi = &spa_feature_table[i]; if (!fi->fi_zfs_mod_supported) continue; const char *ro = (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? " (read-only compatible)" : ""; (void) printf("%-37s%s\n", fi->fi_uname, ro); (void) printf(" %s\n", fi->fi_desc); } (void) printf("\n"); (void) printf(gettext("The following legacy versions are also " "supported:\n\n")); (void) printf(gettext("VER DESCRIPTION\n")); (void) printf("--- -----------------------------------------" "---------------\n"); (void) printf(gettext(" 1 Initial ZFS version\n")); (void) printf(gettext(" 2 Ditto blocks " "(replicated metadata)\n")); (void) printf(gettext(" 3 Hot spares and double parity " "RAID-Z\n")); (void) printf(gettext(" 4 zpool history\n")); (void) printf(gettext(" 5 Compression using the gzip " "algorithm\n")); (void) printf(gettext(" 6 bootfs pool property\n")); (void) printf(gettext(" 7 Separate intent log devices\n")); (void) printf(gettext(" 8 Delegated administration\n")); (void) printf(gettext(" 9 refquota and refreservation " "properties\n")); (void) printf(gettext(" 10 Cache devices\n")); (void) printf(gettext(" 11 Improved scrub performance\n")); (void) printf(gettext(" 12 Snapshot properties\n")); (void) printf(gettext(" 13 snapused property\n")); (void) printf(gettext(" 14 passthrough-x aclinherit\n")); (void) printf(gettext(" 15 user/group space accounting\n")); (void) printf(gettext(" 16 stmf property support\n")); (void) printf(gettext(" 17 Triple-parity RAID-Z\n")); (void) printf(gettext(" 18 Snapshot user holds\n")); (void) printf(gettext(" 19 Log device removal\n")); (void) printf(gettext(" 20 Compression using zle " "(zero-length encoding)\n")); (void) printf(gettext(" 21 Deduplication\n")); (void) printf(gettext(" 22 Received properties\n")); (void) printf(gettext(" 23 Slim ZIL\n")); (void) printf(gettext(" 24 System attributes\n")); (void) printf(gettext(" 25 Improved scrub stats\n")); (void) printf(gettext(" 26 Improved snapshot deletion " "performance\n")); (void) printf(gettext(" 27 Improved snapshot creation " "performance\n")); (void) printf(gettext(" 28 Multiple vdev replacements\n")); (void) printf(gettext("\nFor more information on a particular " "version, including supported releases,\n")); (void) printf(gettext("see the ZFS Administration Guide.\n\n")); } else if (argc == 0 && upgradeall) { cb.cb_first = B_TRUE; ret = zpool_iter(g_zfs, upgrade_cb, &cb); if (ret == 0 && cb.cb_first) { if (cb.cb_version == SPA_VERSION) { (void) printf(gettext("All pools are already " "formatted using feature flags.\n\n")); (void) printf(gettext("Every feature flags " "pool already has all supported and " "requested features enabled.\n")); } else { (void) printf(gettext("All pools are already " "formatted with version %llu or higher.\n"), (u_longlong_t)cb.cb_version); } } } else if (argc == 0) { cb.cb_first = B_TRUE; ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb); assert(ret == 0); if (cb.cb_first) { (void) printf(gettext("All pools are formatted " "using feature flags.\n\n")); } else { (void) printf(gettext("\nUse 'zpool upgrade -v' " "for a list of available legacy versions.\n")); } cb.cb_first = B_TRUE; ret = zpool_iter(g_zfs, upgrade_list_disabled_cb, &cb); assert(ret == 0); if (cb.cb_first) { (void) printf(gettext("Every feature flags pool has " "all supported and requested features enabled.\n")); } else { (void) printf(gettext("\n")); } } else { ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, upgrade_one, &cb); } return (ret); } typedef struct hist_cbdata { boolean_t first; boolean_t longfmt; boolean_t internal; } hist_cbdata_t; static void print_history_records(nvlist_t *nvhis, hist_cbdata_t *cb) { nvlist_t **records; uint_t numrecords; int i; verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); for (i = 0; i < numrecords; i++) { nvlist_t *rec = records[i]; char tbuf[64] = ""; if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { time_t tsec; struct tm t; tsec = fnvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME); (void) localtime_r(&tsec, &t); (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); } if (nvlist_exists(rec, ZPOOL_HIST_ELAPSED_NS)) { uint64_t elapsed_ns = fnvlist_lookup_int64(records[i], ZPOOL_HIST_ELAPSED_NS); (void) snprintf(tbuf + strlen(tbuf), sizeof (tbuf) - strlen(tbuf), " (%lldms)", (long long)elapsed_ns / 1000 / 1000); } if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { (void) printf("%s %s", tbuf, fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { int ievent = fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); if (!cb->internal) continue; if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { (void) printf("%s unrecognized record:\n", tbuf); dump_nvlist(rec, 4); continue; } (void) printf("%s [internal %s txg:%lld] %s", tbuf, zfs_history_event_names[ievent], (longlong_t)fnvlist_lookup_uint64( rec, ZPOOL_HIST_TXG), fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { if (!cb->internal) continue; (void) printf("%s [txg:%lld] %s", tbuf, (longlong_t)fnvlist_lookup_uint64( rec, ZPOOL_HIST_TXG), fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { (void) printf(" %s (%llu)", fnvlist_lookup_string(rec, ZPOOL_HIST_DSNAME), (u_longlong_t)fnvlist_lookup_uint64(rec, ZPOOL_HIST_DSID)); } (void) printf(" %s", fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { if (!cb->internal) continue; (void) printf("%s ioctl %s\n", tbuf, fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { (void) printf(" input:\n"); dump_nvlist(fnvlist_lookup_nvlist(rec, ZPOOL_HIST_INPUT_NVL), 8); } if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { (void) printf(" output:\n"); dump_nvlist(fnvlist_lookup_nvlist(rec, ZPOOL_HIST_OUTPUT_NVL), 8); } if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_SIZE)) { (void) printf(" output nvlist omitted; " "original size: %lldKB\n", (longlong_t)fnvlist_lookup_int64(rec, ZPOOL_HIST_OUTPUT_SIZE) / 1024); } if (nvlist_exists(rec, ZPOOL_HIST_ERRNO)) { (void) printf(" errno: %lld\n", (longlong_t)fnvlist_lookup_int64(rec, ZPOOL_HIST_ERRNO)); } } else { if (!cb->internal) continue; (void) printf("%s unrecognized record:\n", tbuf); dump_nvlist(rec, 4); } if (!cb->longfmt) { (void) printf("\n"); continue; } (void) printf(" ["); if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); struct passwd *pwd = getpwuid(who); (void) printf("user %d ", (int)who); if (pwd != NULL) (void) printf("(%s) ", pwd->pw_name); } if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { (void) printf("on %s", fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); } if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { (void) printf(":%s", fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); } (void) printf("]"); (void) printf("\n"); } } /* * Print out the command history for a specific pool. */ static int get_history_one(zpool_handle_t *zhp, void *data) { nvlist_t *nvhis; int ret; hist_cbdata_t *cb = (hist_cbdata_t *)data; uint64_t off = 0; boolean_t eof = B_FALSE; cb->first = B_FALSE; (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); while (!eof) { if ((ret = zpool_get_history(zhp, &nvhis, &off, &eof)) != 0) return (ret); print_history_records(nvhis, cb); nvlist_free(nvhis); } (void) printf("\n"); return (ret); } /* * zpool history * * Displays the history of commands that modified pools. */ int zpool_do_history(int argc, char **argv) { hist_cbdata_t cbdata = { 0 }; int ret; int c; cbdata.first = B_TRUE; /* check options */ while ((c = getopt(argc, argv, "li")) != -1) { switch (c) { case 'l': cbdata.longfmt = B_TRUE; break; case 'i': cbdata.internal = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, get_history_one, &cbdata); if (argc == 0 && cbdata.first == B_TRUE) { (void) fprintf(stderr, gettext("no pools available\n")); return (0); } return (ret); } typedef struct ev_opts { int verbose; int scripted; int follow; int clear; char poolname[ZFS_MAX_DATASET_NAME_LEN]; } ev_opts_t; static void zpool_do_events_short(nvlist_t *nvl, ev_opts_t *opts) { char ctime_str[26], str[32], *ptr; int64_t *tv; uint_t n; verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0); memset(str, ' ', 32); (void) ctime_r((const time_t *)&tv[0], ctime_str); (void) memcpy(str, ctime_str+4, 6); /* 'Jun 30' */ (void) memcpy(str+7, ctime_str+20, 4); /* '1993' */ (void) memcpy(str+12, ctime_str+11, 8); /* '21:49:08' */ (void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]); /* '.123456789' */ if (opts->scripted) (void) printf(gettext("%s\t"), str); else (void) printf(gettext("%s "), str); verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0); (void) printf(gettext("%s\n"), ptr); } static void zpool_do_events_nvprint(nvlist_t *nvl, int depth) { nvpair_t *nvp; for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { data_type_t type = nvpair_type(nvp); const char *name = nvpair_name(nvp); boolean_t b; uint8_t i8; uint16_t i16; uint32_t i32; uint64_t i64; char *str; nvlist_t *cnv; printf(gettext("%*s%s = "), depth, "", name); switch (type) { case DATA_TYPE_BOOLEAN: printf(gettext("%s"), "1"); break; case DATA_TYPE_BOOLEAN_VALUE: (void) nvpair_value_boolean_value(nvp, &b); printf(gettext("%s"), b ? "1" : "0"); break; case DATA_TYPE_BYTE: (void) nvpair_value_byte(nvp, &i8); printf(gettext("0x%x"), i8); break; case DATA_TYPE_INT8: (void) nvpair_value_int8(nvp, (void *)&i8); printf(gettext("0x%x"), i8); break; case DATA_TYPE_UINT8: (void) nvpair_value_uint8(nvp, &i8); printf(gettext("0x%x"), i8); break; case DATA_TYPE_INT16: (void) nvpair_value_int16(nvp, (void *)&i16); printf(gettext("0x%x"), i16); break; case DATA_TYPE_UINT16: (void) nvpair_value_uint16(nvp, &i16); printf(gettext("0x%x"), i16); break; case DATA_TYPE_INT32: (void) nvpair_value_int32(nvp, (void *)&i32); printf(gettext("0x%x"), i32); break; case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &i32); printf(gettext("0x%x"), i32); break; case DATA_TYPE_INT64: (void) nvpair_value_int64(nvp, (void *)&i64); printf(gettext("0x%llx"), (u_longlong_t)i64); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &i64); /* * translate vdev state values to readable * strings to aide zpool events consumers */ if (strcmp(name, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE) == 0 || strcmp(name, FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE) == 0) { printf(gettext("\"%s\" (0x%llx)"), zpool_state_to_name(i64, VDEV_AUX_NONE), (u_longlong_t)i64); } else { printf(gettext("0x%llx"), (u_longlong_t)i64); } break; case DATA_TYPE_HRTIME: (void) nvpair_value_hrtime(nvp, (void *)&i64); printf(gettext("0x%llx"), (u_longlong_t)i64); break; case DATA_TYPE_STRING: (void) nvpair_value_string(nvp, &str); printf(gettext("\"%s\""), str ? str : ""); break; case DATA_TYPE_NVLIST: printf(gettext("(embedded nvlist)\n")); (void) nvpair_value_nvlist(nvp, &cnv); zpool_do_events_nvprint(cnv, depth + 8); printf(gettext("%*s(end %s)"), depth, "", name); break; case DATA_TYPE_NVLIST_ARRAY: { nvlist_t **val; uint_t i, nelem; (void) nvpair_value_nvlist_array(nvp, &val, &nelem); printf(gettext("(%d embedded nvlists)\n"), nelem); for (i = 0; i < nelem; i++) { printf(gettext("%*s%s[%d] = %s\n"), depth, "", name, i, "(embedded nvlist)"); zpool_do_events_nvprint(val[i], depth + 8); printf(gettext("%*s(end %s[%i])\n"), depth, "", name, i); } printf(gettext("%*s(end %s)\n"), depth, "", name); } break; case DATA_TYPE_INT8_ARRAY: { int8_t *val; uint_t i, nelem; (void) nvpair_value_int8_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_UINT8_ARRAY: { uint8_t *val; uint_t i, nelem; (void) nvpair_value_uint8_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_INT16_ARRAY: { int16_t *val; uint_t i, nelem; (void) nvpair_value_int16_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_UINT16_ARRAY: { uint16_t *val; uint_t i, nelem; (void) nvpair_value_uint16_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_INT32_ARRAY: { int32_t *val; uint_t i, nelem; (void) nvpair_value_int32_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_UINT32_ARRAY: { uint32_t *val; uint_t i, nelem; (void) nvpair_value_uint32_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%x "), val[i]); break; } case DATA_TYPE_INT64_ARRAY: { int64_t *val; uint_t i, nelem; (void) nvpair_value_int64_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%llx "), (u_longlong_t)val[i]); break; } case DATA_TYPE_UINT64_ARRAY: { uint64_t *val; uint_t i, nelem; (void) nvpair_value_uint64_array(nvp, &val, &nelem); for (i = 0; i < nelem; i++) printf(gettext("0x%llx "), (u_longlong_t)val[i]); break; } case DATA_TYPE_STRING_ARRAY: { char **str; uint_t i, nelem; (void) nvpair_value_string_array(nvp, &str, &nelem); for (i = 0; i < nelem; i++) printf(gettext("\"%s\" "), str[i] ? str[i] : ""); break; } case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_DOUBLE: case DATA_TYPE_DONTCARE: case DATA_TYPE_UNKNOWN: printf(gettext("")); break; } printf(gettext("\n")); } } static int zpool_do_events_next(ev_opts_t *opts) { nvlist_t *nvl; int zevent_fd, ret, dropped; char *pool; zevent_fd = open(ZFS_DEV, O_RDWR); VERIFY(zevent_fd >= 0); if (!opts->scripted) (void) printf(gettext("%-30s %s\n"), "TIME", "CLASS"); while (1) { ret = zpool_events_next(g_zfs, &nvl, &dropped, (opts->follow ? ZEVENT_NONE : ZEVENT_NONBLOCK), zevent_fd); if (ret || nvl == NULL) break; if (dropped > 0) (void) printf(gettext("dropped %d events\n"), dropped); if (strlen(opts->poolname) > 0 && nvlist_lookup_string(nvl, FM_FMRI_ZFS_POOL, &pool) == 0 && strcmp(opts->poolname, pool) != 0) continue; zpool_do_events_short(nvl, opts); if (opts->verbose) { zpool_do_events_nvprint(nvl, 8); printf(gettext("\n")); } (void) fflush(stdout); nvlist_free(nvl); } VERIFY(0 == close(zevent_fd)); return (ret); } static int zpool_do_events_clear(ev_opts_t *opts) { int count, ret; ret = zpool_events_clear(g_zfs, &count); if (!ret) (void) printf(gettext("cleared %d events\n"), count); return (ret); } /* * zpool events [-vHf [pool] | -c] * * Displays events logs by ZFS. */ int zpool_do_events(int argc, char **argv) { ev_opts_t opts = { 0 }; int ret; int c; /* check options */ while ((c = getopt(argc, argv, "vHfc")) != -1) { switch (c) { case 'v': opts.verbose = 1; break; case 'H': opts.scripted = 1; break; case 'f': opts.follow = 1; break; case 'c': opts.clear = 1; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } else if (argc == 1) { (void) strlcpy(opts.poolname, argv[0], sizeof (opts.poolname)); if (!zfs_name_valid(opts.poolname, ZFS_TYPE_POOL)) { (void) fprintf(stderr, gettext("invalid pool name '%s'\n"), opts.poolname); usage(B_FALSE); } } if ((argc == 1 || opts.verbose || opts.scripted || opts.follow) && opts.clear) { (void) fprintf(stderr, gettext("invalid options combined with -c\n")); usage(B_FALSE); } if (opts.clear) ret = zpool_do_events_clear(&opts); else ret = zpool_do_events_next(&opts); return (ret); } static int get_callback(zpool_handle_t *zhp, void *data) { zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; char value[MAXNAMELEN]; zprop_source_t srctype; zprop_list_t *pl; for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { /* * Skip the special fake placeholder. This will also skip * over the name property when 'all' is specified. */ if (pl->pl_prop == ZPOOL_PROP_NAME && pl == cbp->cb_proplist) continue; if (pl->pl_prop == ZPROP_INVAL && (zpool_prop_feature(pl->pl_user_prop) || zpool_prop_unsupported(pl->pl_user_prop))) { srctype = ZPROP_SRC_LOCAL; if (zpool_prop_get_feature(zhp, pl->pl_user_prop, value, sizeof (value)) == 0) { zprop_print_one_property(zpool_get_name(zhp), cbp, pl->pl_user_prop, value, srctype, NULL, NULL); } } else { if (zpool_get_prop(zhp, pl->pl_prop, value, sizeof (value), &srctype, cbp->cb_literal) != 0) continue; zprop_print_one_property(zpool_get_name(zhp), cbp, zpool_prop_to_name(pl->pl_prop), value, srctype, NULL, NULL); } } return (0); } /* * zpool get [-Hp] [-o "all" | field[,...]] <"all" | property[,...]> ... * * -H Scripted mode. Don't display headers, and separate properties * by a single tab. * -o List of columns to display. Defaults to * "name,property,value,source". * -p Display values in parsable (exact) format. * * Get properties of pools in the system. Output space statistics * for each one as well as other attributes. */ int zpool_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; zprop_list_t fake_name = { 0 }; int ret; int c, i; char *value; cb.cb_first = B_TRUE; /* * Set up default columns and sources. */ cb.cb_sources = ZPROP_SRC_ALL; cb.cb_columns[0] = GET_COL_NAME; cb.cb_columns[1] = GET_COL_PROPERTY; cb.cb_columns[2] = GET_COL_VALUE; cb.cb_columns[3] = GET_COL_SOURCE; cb.cb_type = ZFS_TYPE_POOL; /* check options */ while ((c = getopt(argc, argv, ":Hpo:")) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; break; case 'H': cb.cb_scripted = B_TRUE; break; case 'o': bzero(&cb.cb_columns, sizeof (cb.cb_columns)); i = 0; while (*optarg != '\0') { static char *col_subopts[] = { "name", "property", "value", "source", "all", NULL }; if (i == ZFS_GET_NCOLS) { (void) fprintf(stderr, gettext("too " "many fields given to -o " "option\n")); usage(B_FALSE); } switch (getsubopt(&optarg, col_subopts, &value)) { case 0: cb.cb_columns[i++] = GET_COL_NAME; break; case 1: cb.cb_columns[i++] = GET_COL_PROPERTY; break; case 2: cb.cb_columns[i++] = GET_COL_VALUE; break; case 3: cb.cb_columns[i++] = GET_COL_SOURCE; break; case 4: if (i > 0) { (void) fprintf(stderr, gettext("\"all\" conflicts " "with specific fields " "given to -o option\n")); usage(B_FALSE); } cb.cb_columns[0] = GET_COL_NAME; cb.cb_columns[1] = GET_COL_PROPERTY; cb.cb_columns[2] = GET_COL_VALUE; cb.cb_columns[3] = GET_COL_SOURCE; i = ZFS_GET_NCOLS; break; default: (void) fprintf(stderr, gettext("invalid column name " "'%s'\n"), value); usage(B_FALSE); } } break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; if (argc < 1) { (void) fprintf(stderr, gettext("missing property " "argument\n")); usage(B_FALSE); } if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist, ZFS_TYPE_POOL) != 0) usage(B_FALSE); argc--; argv++; if (cb.cb_proplist != NULL) { fake_name.pl_prop = ZPOOL_PROP_NAME; fake_name.pl_width = strlen(gettext("NAME")); fake_name.pl_next = cb.cb_proplist; cb.cb_proplist = &fake_name; } ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, cb.cb_literal, get_callback, &cb); if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); else zprop_free_list(cb.cb_proplist); return (ret); } typedef struct set_cbdata { char *cb_propname; char *cb_value; boolean_t cb_any_successful; } set_cbdata_t; static int set_callback(zpool_handle_t *zhp, void *data) { int error; set_cbdata_t *cb = (set_cbdata_t *)data; /* Check if we have out-of-bounds features */ if (strcmp(cb->cb_propname, ZPOOL_CONFIG_COMPATIBILITY) == 0) { boolean_t features[SPA_FEATURES]; if (zpool_do_load_compat(cb->cb_value, features) != ZPOOL_COMPATIBILITY_OK) return (-1); nvlist_t *enabled = zpool_get_features(zhp); spa_feature_t i; for (i = 0; i < SPA_FEATURES; i++) { const char *fguid = spa_feature_table[i].fi_guid; if (nvlist_exists(enabled, fguid) && !features[i]) break; } if (i < SPA_FEATURES) (void) fprintf(stderr, gettext("Warning: one or " "more features already enabled on pool '%s'\n" "are not present in this compatibility set.\n"), zpool_get_name(zhp)); } /* if we're setting a feature, check it's in compatibility set */ if (zpool_prop_feature(cb->cb_propname) && strcmp(cb->cb_value, ZFS_FEATURE_ENABLED) == 0) { char *fname = strchr(cb->cb_propname, '@') + 1; spa_feature_t f; if (zfeature_lookup_name(fname, &f) == 0) { char compat[ZFS_MAXPROPLEN]; if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compat, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compat[0] = '\0'; boolean_t features[SPA_FEATURES]; if (zpool_do_load_compat(compat, features) != ZPOOL_COMPATIBILITY_OK) { (void) fprintf(stderr, gettext("Error: " "cannot enable feature '%s' on pool '%s'\n" "because the pool's 'compatibility' " "property cannot be parsed.\n"), fname, zpool_get_name(zhp)); return (-1); } if (!features[f]) { (void) fprintf(stderr, gettext("Error: " "cannot enable feature '%s' on pool '%s'\n" "as it is not specified in this pool's " "current compatibility set.\n" "Consider setting 'compatibility' to a " "less restrictive set, or to 'off'.\n"), fname, zpool_get_name(zhp)); return (-1); } } } error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value); if (!error) cb->cb_any_successful = B_TRUE; return (error); } int zpool_do_set(int argc, char **argv) { set_cbdata_t cb = { 0 }; int error; if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing property=value " "argument\n")); usage(B_FALSE); } if (argc < 3) { (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } if (argc > 3) { (void) fprintf(stderr, gettext("too many pool names\n")); usage(B_FALSE); } cb.cb_propname = argv[1]; cb.cb_value = strchr(cb.cb_propname, '='); if (cb.cb_value == NULL) { (void) fprintf(stderr, gettext("missing value in " "property=value argument\n")); usage(B_FALSE); } *(cb.cb_value) = '\0'; cb.cb_value++; error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL, B_FALSE, set_callback, &cb); return (error); } /* Add up the total number of bytes left to initialize/trim across all vdevs */ static uint64_t vdev_activity_remaining(nvlist_t *nv, zpool_wait_activity_t activity) { uint64_t bytes_remaining; nvlist_t **child; uint_t c, children; vdev_stat_t *vs; assert(activity == ZPOOL_WAIT_INITIALIZE || activity == ZPOOL_WAIT_TRIM); verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (activity == ZPOOL_WAIT_INITIALIZE && vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE) bytes_remaining = vs->vs_initialize_bytes_est - vs->vs_initialize_bytes_done; else if (activity == ZPOOL_WAIT_TRIM && vs->vs_trim_state == VDEV_TRIM_ACTIVE) bytes_remaining = vs->vs_trim_bytes_est - vs->vs_trim_bytes_done; else bytes_remaining = 0; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; for (c = 0; c < children; c++) bytes_remaining += vdev_activity_remaining(child[c], activity); return (bytes_remaining); } /* Add up the total number of bytes left to rebuild across top-level vdevs */ static uint64_t vdev_activity_top_remaining(nvlist_t *nv) { uint64_t bytes_remaining = 0; nvlist_t **child; uint_t children; int error; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; for (uint_t c = 0; c < children; c++) { vdev_rebuild_stat_t *vrs; uint_t i; error = nvlist_lookup_uint64_array(child[c], ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i); if (error == 0) { if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) { bytes_remaining += (vrs->vrs_bytes_est - vrs->vrs_bytes_rebuilt); } } } return (bytes_remaining); } /* Whether any vdevs are 'spare' or 'replacing' vdevs */ static boolean_t vdev_any_spare_replacing(nvlist_t *nv) { nvlist_t **child; uint_t c, children; char *vdev_type; (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &vdev_type); if (strcmp(vdev_type, VDEV_TYPE_REPLACING) == 0 || strcmp(vdev_type, VDEV_TYPE_SPARE) == 0 || strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0) { return (B_TRUE); } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; for (c = 0; c < children; c++) { if (vdev_any_spare_replacing(child[c])) return (B_TRUE); } return (B_FALSE); } typedef struct wait_data { char *wd_poolname; boolean_t wd_scripted; boolean_t wd_exact; boolean_t wd_headers_once; boolean_t wd_should_exit; /* Which activities to wait for */ boolean_t wd_enabled[ZPOOL_WAIT_NUM_ACTIVITIES]; float wd_interval; pthread_cond_t wd_cv; pthread_mutex_t wd_mutex; } wait_data_t; /* * Print to stdout a single line, containing one column for each activity that * we are waiting for specifying how many bytes of work are left for that * activity. */ static void print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row) { nvlist_t *config, *nvroot; uint_t c; int i; pool_checkpoint_stat_t *pcs = NULL; pool_scan_stat_t *pss = NULL; pool_removal_stat_t *prs = NULL; char *headers[] = {"DISCARD", "FREE", "INITIALIZE", "REPLACE", "REMOVE", "RESILVER", "SCRUB", "TRIM"}; int col_widths[ZPOOL_WAIT_NUM_ACTIVITIES]; /* Calculate the width of each column */ for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { /* * Make sure we have enough space in the col for pretty-printed * numbers and for the column header, and then leave a couple * spaces between cols for readability. */ col_widths[i] = MAX(strlen(headers[i]), 6) + 2; } /* Print header if appropriate */ int term_height = terminal_height(); boolean_t reprint_header = (!wd->wd_headers_once && term_height > 0 && row % (term_height-1) == 0); if (!wd->wd_scripted && (row == 0 || reprint_header)) { for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { if (wd->wd_enabled[i]) (void) printf("%*s", col_widths[i], headers[i]); } (void) printf("\n"); } /* Bytes of work remaining in each activity */ int64_t bytes_rem[ZPOOL_WAIT_NUM_ACTIVITIES] = {0}; bytes_rem[ZPOOL_WAIT_FREE] = zpool_get_prop_int(zhp, ZPOOL_PROP_FREEING, NULL); config = zpool_get_config(zhp, NULL); nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); if (pcs != NULL && pcs->pcs_state == CS_CHECKPOINT_DISCARDING) bytes_rem[ZPOOL_WAIT_CKPT_DISCARD] = pcs->pcs_space; (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); if (prs != NULL && prs->prs_state == DSS_SCANNING) bytes_rem[ZPOOL_WAIT_REMOVE] = prs->prs_to_copy - prs->prs_copied; (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&pss, &c); if (pss != NULL && pss->pss_state == DSS_SCANNING && pss->pss_pass_scrub_pause == 0) { int64_t rem = pss->pss_to_examine - pss->pss_issued; if (pss->pss_func == POOL_SCAN_SCRUB) bytes_rem[ZPOOL_WAIT_SCRUB] = rem; else bytes_rem[ZPOOL_WAIT_RESILVER] = rem; } else if (check_rebuilding(nvroot, NULL)) { bytes_rem[ZPOOL_WAIT_RESILVER] = vdev_activity_top_remaining(nvroot); } bytes_rem[ZPOOL_WAIT_INITIALIZE] = vdev_activity_remaining(nvroot, ZPOOL_WAIT_INITIALIZE); bytes_rem[ZPOOL_WAIT_TRIM] = vdev_activity_remaining(nvroot, ZPOOL_WAIT_TRIM); /* * A replace finishes after resilvering finishes, so the amount of work * left for a replace is the same as for resilvering. * * It isn't quite correct to say that if we have any 'spare' or * 'replacing' vdevs and a resilver is happening, then a replace is in * progress, like we do here. When a hot spare is used, the faulted vdev * is not removed after the hot spare is resilvered, so parent 'spare' * vdev is not removed either. So we could have a 'spare' vdev, but be * resilvering for a different reason. However, we use it as a heuristic * because we don't have access to the DTLs, which could tell us whether * or not we have really finished resilvering a hot spare. */ if (vdev_any_spare_replacing(nvroot)) bytes_rem[ZPOOL_WAIT_REPLACE] = bytes_rem[ZPOOL_WAIT_RESILVER]; if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { char buf[64]; if (!wd->wd_enabled[i]) continue; if (wd->wd_exact) (void) snprintf(buf, sizeof (buf), "%" PRIi64, bytes_rem[i]); else zfs_nicenum(bytes_rem[i], buf, sizeof (buf)); if (wd->wd_scripted) (void) printf(i == 0 ? "%s" : "\t%s", buf); else (void) printf(" %*s", col_widths[i] - 1, buf); } (void) printf("\n"); (void) fflush(stdout); } static void * wait_status_thread(void *arg) { wait_data_t *wd = (wait_data_t *)arg; zpool_handle_t *zhp; if ((zhp = zpool_open(g_zfs, wd->wd_poolname)) == NULL) return (void *)(1); for (int row = 0; ; row++) { boolean_t missing; struct timespec timeout; int ret = 0; (void) clock_gettime(CLOCK_REALTIME, &timeout); if (zpool_refresh_stats(zhp, &missing) != 0 || missing || zpool_props_refresh(zhp) != 0) { zpool_close(zhp); return (void *)(uintptr_t)(missing ? 0 : 1); } print_wait_status_row(wd, zhp, row); timeout.tv_sec += floor(wd->wd_interval); long nanos = timeout.tv_nsec + (wd->wd_interval - floor(wd->wd_interval)) * NANOSEC; if (nanos >= NANOSEC) { timeout.tv_sec++; timeout.tv_nsec = nanos - NANOSEC; } else { timeout.tv_nsec = nanos; } pthread_mutex_lock(&wd->wd_mutex); if (!wd->wd_should_exit) ret = pthread_cond_timedwait(&wd->wd_cv, &wd->wd_mutex, &timeout); pthread_mutex_unlock(&wd->wd_mutex); if (ret == 0) { break; /* signaled by main thread */ } else if (ret != ETIMEDOUT) { (void) fprintf(stderr, gettext("pthread_cond_timedwait " "failed: %s\n"), strerror(ret)); zpool_close(zhp); return (void *)(uintptr_t)(1); } } zpool_close(zhp); return (void *)(0); } int zpool_do_wait(int argc, char **argv) { boolean_t verbose = B_FALSE; int c; char *value; int i; unsigned long count; pthread_t status_thr; int error = 0; zpool_handle_t *zhp; wait_data_t wd; wd.wd_scripted = B_FALSE; wd.wd_exact = B_FALSE; wd.wd_headers_once = B_FALSE; wd.wd_should_exit = B_FALSE; pthread_mutex_init(&wd.wd_mutex, NULL); pthread_cond_init(&wd.wd_cv, NULL); /* By default, wait for all types of activity. */ for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) wd.wd_enabled[i] = B_TRUE; while ((c = getopt(argc, argv, "HpT:t:")) != -1) { switch (c) { case 'H': wd.wd_scripted = B_TRUE; break; case 'n': wd.wd_headers_once = B_TRUE; break; case 'p': wd.wd_exact = B_TRUE; break; case 'T': get_timestamp_arg(*optarg); break; case 't': { static char *col_subopts[] = { "discard", "free", "initialize", "replace", "remove", "resilver", "scrub", "trim", NULL }; /* Reset activities array */ bzero(&wd.wd_enabled, sizeof (wd.wd_enabled)); while (*optarg != '\0') { int activity = getsubopt(&optarg, col_subopts, &value); if (activity < 0) { (void) fprintf(stderr, gettext("invalid activity '%s'\n"), value); usage(B_FALSE); } wd.wd_enabled[activity] = B_TRUE; } break; } case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); usage(B_FALSE); } } argc -= optind; argv += optind; get_interval_count(&argc, argv, &wd.wd_interval, &count); if (count != 0) { /* This subcmd only accepts an interval, not a count */ (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } if (wd.wd_interval != 0) verbose = B_TRUE; if (argc < 1) { (void) fprintf(stderr, gettext("missing 'pool' argument\n")); usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } wd.wd_poolname = argv[0]; if ((zhp = zpool_open(g_zfs, wd.wd_poolname)) == NULL) return (1); if (verbose) { /* * We use a separate thread for printing status updates because * the main thread will call lzc_wait(), which blocks as long * as an activity is in progress, which can be a long time. */ if (pthread_create(&status_thr, NULL, wait_status_thread, &wd) != 0) { (void) fprintf(stderr, gettext("failed to create status" "thread: %s\n"), strerror(errno)); zpool_close(zhp); return (1); } } /* * Loop over all activities that we are supposed to wait for until none * of them are in progress. Note that this means we can end up waiting * for more activities to complete than just those that were in progress * when we began waiting; if an activity we are interested in begins * while we are waiting for another activity, we will wait for both to * complete before exiting. */ for (;;) { boolean_t missing = B_FALSE; boolean_t any_waited = B_FALSE; for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { boolean_t waited; if (!wd.wd_enabled[i]) continue; error = zpool_wait_status(zhp, i, &missing, &waited); if (error != 0 || missing) break; any_waited = (any_waited || waited); } if (error != 0 || missing || !any_waited) break; } zpool_close(zhp); if (verbose) { uintptr_t status; pthread_mutex_lock(&wd.wd_mutex); wd.wd_should_exit = B_TRUE; pthread_cond_signal(&wd.wd_cv); pthread_mutex_unlock(&wd.wd_mutex); (void) pthread_join(status_thr, (void *)&status); if (status != 0) error = status; } pthread_mutex_destroy(&wd.wd_mutex); pthread_cond_destroy(&wd.wd_cv); return (error); } static int find_command_idx(char *command, int *idx) { int i; for (i = 0; i < NCOMMAND; i++) { if (command_table[i].name == NULL) continue; if (strcmp(command, command_table[i].name) == 0) { *idx = i; return (0); } } return (1); } /* * Display version message */ static int zpool_do_version(int argc, char **argv) { if (zfs_version_print() == -1) return (1); return (0); } /* * Do zpool_load_compat() and print error message on failure */ static zpool_compat_status_t zpool_do_load_compat(const char *compat, boolean_t *list) { char report[1024]; zpool_compat_status_t ret; ret = zpool_load_compat(compat, list, report, 1024); switch (ret) { case ZPOOL_COMPATIBILITY_OK: break; case ZPOOL_COMPATIBILITY_NOFILES: case ZPOOL_COMPATIBILITY_BADFILE: case ZPOOL_COMPATIBILITY_BADTOKEN: (void) fprintf(stderr, "Error: %s\n", report); break; case ZPOOL_COMPATIBILITY_WARNTOKEN: (void) fprintf(stderr, "Warning: %s\n", report); ret = ZPOOL_COMPATIBILITY_OK; break; } return (ret); } int main(int argc, char **argv) { int ret = 0; int i = 0; char *cmdname; char **newargv; (void) setlocale(LC_ALL, ""); (void) setlocale(LC_NUMERIC, "C"); (void) textdomain(TEXT_DOMAIN); srand(time(NULL)); opterr = 0; /* * Make sure the user has specified some command. */ if (argc < 2) { (void) fprintf(stderr, gettext("missing command\n")); usage(B_FALSE); } cmdname = argv[1]; /* * Special case '-?' */ if ((strcmp(cmdname, "-?") == 0) || strcmp(cmdname, "--help") == 0) usage(B_TRUE); /* * Special case '-V|--version' */ if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0)) return (zpool_do_version(argc, argv)); if ((g_zfs = libzfs_init()) == NULL) { (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } libzfs_print_on_error(g_zfs, B_TRUE); zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); /* * Many commands modify input strings for string parsing reasons. * We create a copy to protect the original argv. */ newargv = malloc((argc + 1) * sizeof (newargv[0])); for (i = 0; i < argc; i++) newargv[i] = strdup(argv[i]); newargv[argc] = NULL; /* * Run the appropriate command. */ if (find_command_idx(cmdname, &i) == 0) { current_command = &command_table[i]; ret = command_table[i].func(argc - 1, newargv + 1); } else if (strchr(cmdname, '=')) { verify(find_command_idx("set", &i) == 0); current_command = &command_table[i]; ret = command_table[i].func(argc, newargv); } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) { /* * 'freeze' is a vile debugging abomination, so we treat * it as such. */ zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, argv[2], sizeof (zc.zc_name)); ret = zfs_ioctl(g_zfs, ZFS_IOC_POOL_FREEZE, &zc); if (ret != 0) { (void) fprintf(stderr, gettext("failed to freeze pool: %d\n"), errno); ret = 1; } log_history = 0; } else { (void) fprintf(stderr, gettext("unrecognized " "command '%s'\n"), cmdname); usage(B_FALSE); ret = 1; } for (i = 0; i < argc; i++) free(newargv[i]); free(newargv); if (ret == 0 && log_history) (void) zpool_log_history(g_zfs, history_str); libzfs_fini(g_zfs); /* * The 'ZFS_ABORT' environment variable causes us to dump core on exit * for the purposes of running ::findleaks. */ if (getenv("ZFS_ABORT") != NULL) { (void) printf("dumping core by request\n"); abort(); } return (ret); } diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index 9e9c9c525331..8fb389d6113f 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -1,142 +1,145 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef ZPOOL_UTIL_H #define ZPOOL_UTIL_H #include #include #include #ifdef __cplusplus extern "C" { #endif /* Path to scripts you can run with "zpool status/iostat -c" */ #define ZPOOL_SCRIPTS_DIR SYSCONFDIR"/zfs/zpool.d" /* * Basic utility functions */ void *safe_malloc(size_t); void zpool_no_memory(void); uint_t num_logs(nvlist_t *nv); uint64_t array64_max(uint64_t array[], unsigned int len); int highbit64(uint64_t i); int lowbit64(uint64_t i); /* * Misc utility functions */ char *zpool_get_cmd_search_path(void); /* * Virtual device functions */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, boolean_t replacing, boolean_t dryrun, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); /* * Pool list functions */ int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, boolean_t, zpool_iter_f, void *); /* Vdev list functions */ int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data); typedef struct zpool_list zpool_list_t; zpool_list_t *pool_list_get(int, char **, zprop_list_t **, boolean_t, int *); void pool_list_update(zpool_list_t *); int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); void pool_list_free(zpool_list_t *); int pool_list_count(zpool_list_t *); void pool_list_remove(zpool_list_t *, zpool_handle_t *); extern libzfs_handle_t *g_zfs; typedef struct vdev_cmd_data { char **lines; /* Array of lines of output, minus the column name */ int lines_cnt; /* Number of lines in the array */ char **cols; /* Array of column names */ int cols_cnt; /* Number of column names */ char *path; /* vdev path */ char *upath; /* vdev underlying path */ char *pool; /* Pool name */ char *cmd; /* backpointer to cmd */ char *vdev_enc_sysfs_path; /* enclosure sysfs path (if any) */ } vdev_cmd_data_t; typedef struct vdev_cmd_data_list { char *cmd; /* Command to run */ unsigned int count; /* Number of vdev_cmd_data items (vdevs) */ /* fields used to select only certain vdevs, if requested */ libzfs_handle_t *g_zfs; char **vdev_names; int vdev_names_count; int cb_name_flags; vdev_cmd_data_t *data; /* Array of vdevs */ /* List of unique column names and widths */ char **uniq_cols; int uniq_cols_cnt; int *uniq_cols_width; } vdev_cmd_data_list_t; vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv, char *cmd, libzfs_handle_t *g_zfs, char **vdev_names, int vdev_names_count, int cb_name_flags); void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl); void free_vdev_cmd_data(vdev_cmd_data_t *data); int vdev_run_cmd_simple(char *path, char *cmd); int check_device(const char *path, boolean_t force, boolean_t isspare, boolean_t iswholedisk); boolean_t check_sector_size_database(char *path, int *sector_size); void vdev_error(const char *fmt, ...); int check_file(const char *file, boolean_t force, boolean_t isspare); void after_zpool_upgrade(zpool_handle_t *zhp); +int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on); +int zpool_power_current_state(zpool_handle_t *zhp, char *vdev); + #ifdef __cplusplus } #endif #endif /* ZPOOL_UTIL_H */ diff --git a/include/libzfs.h b/include/libzfs.h index c7ebc52fe9fb..6c335bbc4af9 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -1,968 +1,970 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright Joyent, Inc. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2016, Intel Corporation. * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2021, Colm Buckley */ #ifndef _LIBZFS_H #define _LIBZFS_H #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Miscellaneous ZFS constants */ #define ZFS_MAXPROPLEN MAXPATHLEN #define ZPOOL_MAXPROPLEN MAXPATHLEN /* * libzfs errors */ typedef enum zfs_error { EZFS_SUCCESS = 0, /* no error -- success */ EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ EZFS_PROPTYPE, /* property does not apply to dataset type */ EZFS_PROPNONINHERIT, /* property is not inheritable */ EZFS_PROPSPACE, /* bad quota or reservation */ EZFS_BADTYPE, /* dataset is not of appropriate type */ EZFS_BUSY, /* pool or dataset is busy */ EZFS_EXISTS, /* pool or dataset already exists */ EZFS_NOENT, /* no such pool or dataset */ EZFS_BADSTREAM, /* bad backup stream */ EZFS_DSREADONLY, /* dataset is readonly */ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ EZFS_INVALIDNAME, /* invalid dataset name */ EZFS_BADRESTORE, /* unable to restore to destination */ EZFS_BADBACKUP, /* backup failed */ EZFS_BADTARGET, /* bad attach/detach/replace target */ EZFS_NODEVICE, /* no such device in pool */ EZFS_BADDEV, /* invalid device to add */ EZFS_NOREPLICAS, /* no valid replicas */ EZFS_RESILVERING, /* resilvering (healing reconstruction) */ EZFS_BADVERSION, /* unsupported version */ EZFS_POOLUNAVAIL, /* pool is currently unavailable */ EZFS_DEVOVERFLOW, /* too many devices in one vdev */ EZFS_BADPATH, /* must be an absolute path */ EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ EZFS_ZONED, /* used improperly in local zone */ EZFS_MOUNTFAILED, /* failed to mount dataset */ EZFS_UMOUNTFAILED, /* failed to unmount dataset */ EZFS_UNSHARENFSFAILED, /* failed to unshare over nfs */ EZFS_SHARENFSFAILED, /* failed to share over nfs */ EZFS_PERM, /* permission denied */ EZFS_NOSPC, /* out of space */ EZFS_FAULT, /* bad address */ EZFS_IO, /* I/O error */ EZFS_INTR, /* signal received */ EZFS_ISSPARE, /* device is a hot spare */ EZFS_INVALCONFIG, /* invalid vdev configuration */ EZFS_RECURSIVE, /* recursive dependency */ EZFS_NOHISTORY, /* no history object */ EZFS_POOLPROPS, /* couldn't retrieve pool props */ EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ EZFS_NAMETOOLONG, /* dataset name is too long */ EZFS_OPENFAILED, /* open of device failed */ EZFS_NOCAP, /* couldn't get capacity */ EZFS_LABELFAILED, /* write of label failed */ EZFS_BADWHO, /* invalid permission who */ EZFS_BADPERM, /* invalid permission */ EZFS_BADPERMSET, /* invalid permission set name */ EZFS_NODELEGATION, /* delegated administration is disabled */ EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ EZFS_SHARESMBFAILED, /* failed to share over smb */ EZFS_BADCACHE, /* bad cache file */ EZFS_ISL2CACHE, /* device is for the level 2 ARC */ EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_NOTSUP, /* ops not supported on this dataset */ EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ EZFS_REFTAG_RELE, /* snapshot release: tag not found */ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ EZFS_PIPEFAILED, /* pipe create failed */ EZFS_THREADCREATEFAILED, /* thread create failed */ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ EZFS_SCRUBBING, /* currently scrubbing */ EZFS_NO_SCRUB, /* no active scrub */ EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */ EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */ EZFS_NO_CHECKPOINT, /* pool has no checkpoint */ EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */ EZFS_VDEV_TOO_BIG, /* a device is too big to be used */ EZFS_IOC_NOTSUPPORTED, /* operation not supported by zfs module */ EZFS_TOOMANY, /* argument list too long */ EZFS_INITIALIZING, /* currently initializing */ EZFS_NO_INITIALIZE, /* no active initialize */ EZFS_WRONG_PARENT, /* invalid parent dataset (e.g ZVOL) */ EZFS_TRIMMING, /* currently trimming */ EZFS_NO_TRIM, /* no active trim */ EZFS_TRIM_NOTSUP, /* device does not support trim */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_REBUILDING, /* resilvering (sequential reconstrution) */ EZFS_CKSUM, /* insufficient replicas */ EZFS_UNKNOWN } zfs_error_t; /* * The following data structures are all part * of the zfs_allow_t data structure which is * used for printing 'allow' permissions. * It is a linked list of zfs_allow_t's which * then contain avl tree's for user/group/sets/... * and each one of the entries in those trees have * avl tree's for the permissions they belong to and * whether they are local,descendent or local+descendent * permissions. The AVL trees are used primarily for * sorting purposes, but also so that we can quickly find * a given user and or permission. */ typedef struct zfs_perm_node { avl_node_t z_node; char z_pname[MAXPATHLEN]; } zfs_perm_node_t; typedef struct zfs_allow_node { avl_node_t z_node; char z_key[MAXPATHLEN]; /* name, such as joe */ avl_tree_t z_localdescend; /* local+descendent perms */ avl_tree_t z_local; /* local permissions */ avl_tree_t z_descend; /* descendent permissions */ } zfs_allow_node_t; typedef struct zfs_allow { struct zfs_allow *z_next; char z_setpoint[MAXPATHLEN]; avl_tree_t z_sets; avl_tree_t z_crperms; avl_tree_t z_user; avl_tree_t z_group; avl_tree_t z_everyone; } zfs_allow_t; /* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; typedef struct zpool_handle zpool_handle_t; typedef struct libzfs_handle libzfs_handle_t; extern int zpool_wait(zpool_handle_t *, zpool_wait_activity_t); extern int zpool_wait_status(zpool_handle_t *, zpool_wait_activity_t, boolean_t *, boolean_t *); /* * Library initialization */ extern libzfs_handle_t *libzfs_init(void); extern void libzfs_fini(libzfs_handle_t *); extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); extern void zfs_save_arguments(int argc, char **, char *, int); extern int zpool_log_history(libzfs_handle_t *, const char *); extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_init(int); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); extern int zfs_standard_error(libzfs_handle_t *, int, const char *); extern void libzfs_mnttab_init(libzfs_handle_t *); extern void libzfs_mnttab_fini(libzfs_handle_t *); extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, struct mnttab *); extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, const char *, const char *); extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); /* * Basic handle functions */ extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); extern void zpool_close(zpool_handle_t *); extern const char *zpool_get_name(zpool_handle_t *); extern int zpool_get_state(zpool_handle_t *); extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); extern const char *zpool_pool_state_to_name(pool_state_t); extern void zpool_free_handles(libzfs_handle_t *); /* * Iterate over all active pools in the system. */ typedef int (*zpool_iter_f)(zpool_handle_t *, void *); extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); extern boolean_t zpool_skip_pool(const char *); /* * Functions to create and destroy pools */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); extern int zpool_destroy(zpool_handle_t *, const char *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { /* do not split, but return the config that would be split off */ unsigned int dryrun : 1; /* after splitting, import the pool */ unsigned int import : 1; int name_flags; } splitflags_t; typedef struct trimflags { /* requested vdevs are for the entire pool */ boolean_t fullpool; /* request a secure trim, requires support from device */ boolean_t secure; /* after starting trim, block until trim completes */ boolean_t wait; /* trim at the requested rate in bytes/second */ uint64_t rate; } trimflags_t; /* * Functions to manipulate pool and vdev state */ extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); extern int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *, trimflags_t *); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); extern int zpool_reopen_one(zpool_handle_t *, void *); extern int zpool_sync_one(zpool_handle_t *, void *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int, boolean_t); extern int zpool_vdev_detach(zpool_handle_t *, const char *); extern int zpool_vdev_remove(zpool_handle_t *, const char *); extern int zpool_vdev_remove_cancel(zpool_handle_t *); extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); +extern int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t, + vdev_aux_t); extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); extern int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv, const char *prepare_str, char **lines[], int *lines_cnt); extern int zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str, char **lines[], int *lines_cnt); extern char ** zpool_vdev_script_alloc_env(const char *pool_name, const char *vdev_path, const char *vdev_upath, const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val); extern void zpool_vdev_script_free_env(char **env); extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); const char *zpool_get_state_str(zpool_handle_t *); /* * Functions to manage pool properties */ extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, size_t proplen, zprop_source_t *, boolean_t literal); extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, zprop_source_t *); extern int zpool_props_refresh(zpool_handle_t *); extern const char *zpool_prop_to_name(zpool_prop_t); extern const char *zpool_prop_values(zpool_prop_t); /* * Pool health statistics. */ typedef enum { /* * The following correspond to faults as defined in the (fault.fs.zfs.*) * event namespace. Each is associated with a corresponding message ID. * This must be kept in sync with the zfs_msgid_table in * lib/libzfs/libzfs_status.c. */ ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */ ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ ZPOOL_STATUS_ERRATA, /* informational errata available */ /* * If the pool has unsupported features but can still be opened in * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the * pool has unsupported features but cannot be opened at all, its * status is ZPOOL_STATUS_UNSUP_FEAT_READ. */ ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */ ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */ /* * These faults have no corresponding message ID. At the time we are * checking the status, the original reason for the FMA fault (I/O or * checksum errors) has been lost. */ ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ /* * The following are not faults per se, but still an error possibly * requiring administrative attention. There is no corresponding * message ID. */ ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */ ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */ ZPOOL_STATUS_RESILVERING, /* device being resilvered */ ZPOOL_STATUS_OFFLINE_DEV, /* device offline */ ZPOOL_STATUS_REMOVED_DEV, /* removed device */ ZPOOL_STATUS_REBUILDING, /* device being rebuilt */ ZPOOL_STATUS_REBUILD_SCRUB, /* recommend scrubbing the pool */ ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ ZPOOL_STATUS_COMPATIBILITY_ERR, /* bad 'compatibility' property */ ZPOOL_STATUS_INCOMPATIBLE_FEAT, /* feature set outside compatibility */ /* * Finally, the following indicates a healthy pool. */ ZPOOL_STATUS_OK } zpool_status_t; extern zpool_status_t zpool_get_status(zpool_handle_t *, char **, zpool_errata_t *); extern zpool_status_t zpool_import_status(nvlist_t *, char **, zpool_errata_t *); /* * Statistics and configuration functions. */ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern nvlist_t *zpool_get_features(zpool_handle_t *); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); /* * Import and export functions */ extern int zpool_export(zpool_handle_t *, boolean_t, const char *); extern int zpool_export_force(zpool_handle_t *, const char *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, nvlist_t *, int); extern void zpool_print_unsup_feat(nvlist_t *config); /* * Miscellaneous pool functions */ struct zfs_cmd; extern const char *zfs_history_event_names[]; typedef enum { VDEV_NAME_PATH = 1 << 0, VDEV_NAME_GUID = 1 << 1, VDEV_NAME_FOLLOW_LINKS = 1 << 2, VDEV_NAME_TYPE_ID = 1 << 3, } vdev_name_t; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, int name_flags); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *, boolean_t *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned, int); extern int zpool_events_clear(libzfs_handle_t *, int *); extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int); extern void zpool_obj_to_path_ds(zpool_handle_t *, uint64_t, uint64_t, char *, size_t); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); extern int zpool_checkpoint(zpool_handle_t *); extern int zpool_discard_checkpoint(zpool_handle_t *); extern boolean_t zpool_is_draid_spare(const char *); /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. */ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); extern const char *zfs_get_pool_name(const zfs_handle_t *); /* * Property management functions. Some functions are shared with the kernel, * and are found in sys/fs/zfs.h. */ /* * zfs dataset property management */ extern const char *zfs_prop_default_string(zfs_prop_t); extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zprop_source_t *, char *, size_t); extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, char *buf, size_t len); extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_all_props(zfs_handle_t *); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t, boolean_t *, boolean_t *); /* * zfs encryption management */ extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *); extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, boolean_t stdin_available, uint8_t **, uint_t *); extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *, nvlist_t *); extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); extern int zfs_crypto_unload_key(zfs_handle_t *); extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); typedef struct zprop_list { int pl_prop; char *pl_user_prop; struct zprop_list *pl_next; boolean_t pl_all; size_t pl_width; size_t pl_recvd_width; boolean_t pl_fixed; } zprop_list_t; extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" #define ZFS_MOUNTPOINT_LEGACY "legacy" #define ZFS_FEATURE_DISABLED "disabled" #define ZFS_FEATURE_ENABLED "enabled" #define ZFS_FEATURE_ACTIVE "active" #define ZFS_UNSUPPORTED_INACTIVE "inactive" #define ZFS_UNSUPPORTED_READONLY "readonly" /* * zpool property management */ extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **, boolean_t); extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, size_t); extern const char *zpool_prop_default_string(zpool_prop_t); extern uint64_t zpool_prop_default_numeric(zpool_prop_t); extern const char *zpool_prop_column_name(zpool_prop_t); extern boolean_t zpool_prop_align_right(zpool_prop_t); /* * Functions shared by zfs and zpool property management. */ extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type); extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); extern void zprop_free_list(zprop_list_t *); #define ZFS_GET_NCOLS 5 typedef enum { GET_COL_NONE, GET_COL_NAME, GET_COL_PROPERTY, GET_COL_VALUE, GET_COL_RECVD, GET_COL_SOURCE } zfs_get_column_t; /* * Functions for printing zfs or zpool properties */ typedef struct zprop_get_cbdata { int cb_sources; zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; int cb_colwidths[ZFS_GET_NCOLS + 1]; boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; zprop_list_t *cb_proplist; zfs_type_t cb_type; } zprop_get_cbdata_t; void zprop_print_one_property(const char *, zprop_get_cbdata_t *, const char *, const char *, zprop_source_t, const char *, const char *); /* * Iterator functions. */ typedef int (*zfs_iter_f)(zfs_handle_t *, void *); extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_mounted(zfs_handle_t *, zfs_iter_f, void *); typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; } get_all_cb_t; void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, zfs_iter_f, void *, boolean_t); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); /* * Functions to create and destroy datasets. */ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, nvlist_t *); extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); typedef struct renameflags { /* recursive rename */ unsigned int recursive : 1; /* don't unmount file systems */ unsigned int nounmount : 1; /* force unmount file systems */ unsigned int forceunmount : 1; } renameflags_t; extern int zfs_rename(zfs_handle_t *, const char *, renameflags_t); typedef struct sendflags { /* Amount of extra information to print. */ int verbosity; /* recursive send (ie, -R) */ boolean_t replicate; /* for recursive send, skip sending missing snapshots */ boolean_t skipmissing; /* for incrementals, do all intermediate snapshots */ boolean_t doall; /* if dataset is a clone, do incremental from its origin */ boolean_t fromorigin; /* field no longer used, maintained for backwards compatibility */ boolean_t pad; /* send properties (ie, -p) */ boolean_t props; /* do not send (no-op, ie. -n) */ boolean_t dryrun; /* parsable verbose output (ie. -P) */ boolean_t parsable; /* show progress (ie. -v) */ boolean_t progress; /* show progress as process title (ie. -V) */ boolean_t progressastitle; /* large blocks (>128K) are permitted */ boolean_t largeblock; /* WRITE_EMBEDDED records of type DATA are permitted */ boolean_t embed_data; /* compressed WRITE records are permitted */ boolean_t compress; /* raw encrypted records are permitted */ boolean_t raw; /* only send received properties (ie. -b) */ boolean_t backup; /* include snapshot holds in send stream */ boolean_t holds; /* stream represents a partially received dataset */ boolean_t saved; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t *, const char *); extern int zfs_send_progress(zfs_handle_t *, int, uint64_t *, uint64_t *); extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, const char *); extern int zfs_send_saved(zfs_handle_t *, sendflags_t *, int, const char *); extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, int); extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, zfs_userspace_cb_t, void *); extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ boolean_t verbose; /* the destination is a prefix, not the exact fs (ie, -d) */ boolean_t isprefix; /* * Only the tail of the sent snapshot path is appended to the * destination to determine the received snapshot name (ie, -e). */ boolean_t istail; /* do not actually do the recv, just check if it would work (ie, -n) */ boolean_t dryrun; /* rollback/destroy filesystems as necessary (eg, -F) */ boolean_t force; /* set "canmount=off" on all modified filesystems */ boolean_t canmountoff; /* * Mark the file systems as "resumable" and do not destroy them if the * receive is interrupted */ boolean_t resumable; /* byteswap flag is used internally; callers need not specify */ boolean_t byteswap; /* do not mount file systems as they are extracted (private) */ boolean_t nomount; /* Was holds flag set in the compound header? */ boolean_t holds; /* skip receive of snapshot holds */ boolean_t skipholds; /* mount the filesystem unless nomount is specified */ boolean_t domount; /* force unmount while recv snapshot (private) */ boolean_t forceunmount; } recvflags_t; extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 1 << 0, ZFS_DIFF_TIMESTAMP = 1 << 1, ZFS_DIFF_CLASSIFY = 1 << 2, ZFS_DIFF_NO_MANGLE = 1 << 3 } diff_flags_t; extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, int); /* * Miscellaneous functions. */ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); extern boolean_t zfs_bookmark_exists(const char *path); /* * Mount support functions. */ extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); extern int zfs_mount(zfs_handle_t *, const char *, int); extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *); extern int zfs_unmount(zfs_handle_t *, const char *, int); extern int zfs_unmountall(zfs_handle_t *, int); #if defined(__linux__) extern int zfs_parse_mount_options(char *mntopts, unsigned long *mntflags, unsigned long *zfsflags, int sloppy, char *badopt, char *mtabopt); extern void zfs_adjust_mount_options(zfs_handle_t *zhp, const char *mntpoint, char *mntopts, char *mtabopt); #endif /* * Share support functions. */ extern boolean_t zfs_is_shared(zfs_handle_t *); extern int zfs_share(zfs_handle_t *); extern int zfs_unshare(zfs_handle_t *); /* * Protocol-specific share support functions. */ extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); extern int zfs_share_nfs(zfs_handle_t *); extern int zfs_share_smb(zfs_handle_t *); extern int zfs_shareall(zfs_handle_t *); extern int zfs_unshare_nfs(zfs_handle_t *, const char *); extern int zfs_unshare_smb(zfs_handle_t *, const char *); extern int zfs_unshareall_nfs(zfs_handle_t *); extern int zfs_unshareall_smb(zfs_handle_t *); extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); extern int zfs_unshareall_bytype(zfs_handle_t *, const char *, const char *); extern int zfs_unshareall(zfs_handle_t *); extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); extern void zfs_commit_nfs_shares(void); extern void zfs_commit_smb_shares(void); extern void zfs_commit_all_shares(void); extern void zfs_commit_shares(const char *); extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); /* * Utility functions to run an external process. */ #define STDOUT_VERBOSE 0x01 #define STDERR_VERBOSE 0x02 #define NO_DEFAULT_PATH 0x04 /* Don't use $PATH to lookup the command */ int libzfs_run_process(const char *, char **, int); int libzfs_run_process_get_stdout(const char *, char *[], char *[], char **[], int *); int libzfs_run_process_get_stdout_nopath(const char *, char *[], char *[], char **[], int *); void libzfs_free_str_array(char **, int); int libzfs_envvar_is_set(char *); /* * Utility functions for zfs version */ extern void zfs_version_userland(char *, int); extern int zfs_version_kernel(char *, int); extern int zfs_version_print(void); /* * Given a device or file, determine if it is part of a pool. */ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, boolean_t *); /* * Label manipulation. */ extern int zpool_clear_label(int); extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *); extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **); /* * Management interfaces for SMB ACL files */ int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); /* * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); /* * Parse a features file for -o compatibility */ typedef enum { ZPOOL_COMPATIBILITY_OK, ZPOOL_COMPATIBILITY_WARNTOKEN, ZPOOL_COMPATIBILITY_BADTOKEN, ZPOOL_COMPATIBILITY_BADFILE, ZPOOL_COMPATIBILITY_NOFILES } zpool_compat_status_t; extern zpool_compat_status_t zpool_load_compat(const char *, boolean_t *, char *, size_t); #ifdef __FreeBSD__ /* * Attach/detach the given filesystem to/from the given jail. */ extern int zfs_jail(zfs_handle_t *zhp, int jailid, int attach); /* * Set loader options for next boot. */ extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *); #endif /* __FreeBSD__ */ #ifdef __cplusplus } #endif #endif /* _LIBZFS_H */ diff --git a/include/libzutil.h b/include/libzutil.h index fa61d7818571..af0f74318729 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -1,196 +1,253 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018 by Delphix. All rights reserved. */ #ifndef _LIBZUTIL_H #define _LIBZUTIL_H #include #include #ifdef __cplusplus extern "C" { #endif /* * Default wait time for a device name to be created. */ #define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */ /* * Pool Config Operations * * These are specific to the library libzfs or libzpool instance. */ typedef nvlist_t *refresh_config_func_t(void *, nvlist_t *); typedef int pool_active_func_t(void *, const char *, uint64_t, boolean_t *); typedef const struct pool_config_ops { refresh_config_func_t *pco_refresh_config; pool_active_func_t *pco_pool_active; } pool_config_ops_t; /* * An instance of pool_config_ops_t is expected in the caller's binary. */ extern const pool_config_ops_t libzfs_config_ops; extern const pool_config_ops_t libzpool_config_ops; typedef struct importargs { char **path; /* a list of paths to search */ int paths; /* number of paths to search */ const char *poolname; /* name of a pool to find */ uint64_t guid; /* guid of a pool to find */ const char *cachefile; /* cachefile to use for import */ boolean_t can_be_active; /* can the pool be active? */ boolean_t scan; /* prefer scanning to libblkid cache */ nvlist_t *policy; /* load policy (max txg, rewind, etc.) */ } importargs_t; extern nvlist_t *zpool_search_import(void *, importargs_t *, const pool_config_ops_t *); extern int zpool_find_config(void *, const char *, nvlist_t **, importargs_t *, const pool_config_ops_t *); extern const char * const * zpool_default_search_paths(size_t *count); extern int zpool_read_label(int, nvlist_t **, int *); extern int zpool_label_disk_wait(const char *, int); +extern int zpool_disk_wait(const char *); struct udev_device; extern int zfs_device_get_devid(struct udev_device *, char *, size_t); extern int zfs_device_get_physical(struct udev_device *, char *, size_t); extern void update_vdev_config_dev_strs(nvlist_t *); /* * Default device paths */ #define DISK_ROOT "/dev" #define UDISK_ROOT "/dev/disk" #define ZVOL_ROOT "/dev/zvol" extern int zfs_append_partition(char *path, size_t max_len); extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen); extern char *zfs_strip_partition(char *); extern char *zfs_strip_path(char *); extern int zfs_strcmp_pathname(const char *, const char *, int); extern boolean_t zfs_dev_is_dm(const char *); extern boolean_t zfs_dev_is_whole_disk(const char *); extern int zfs_dev_flush(int); extern char *zfs_get_underlying_path(const char *); extern char *zfs_get_enclosure_sysfs_path(const char *); extern boolean_t is_mpath_whole_disk(const char *); extern boolean_t zfs_isnumber(const char *); /* * Formats for iostat numbers. Examples: "12K", "30ms", "4B", "2321234", "-". * * ZFS_NICENUM_1024: Print kilo, mega, tera, peta, exa.. * ZFS_NICENUM_BYTES: Print single bytes ("13B"), kilo, mega, tera... * ZFS_NICENUM_TIME: Print nanosecs, microsecs, millisecs, seconds... * ZFS_NICENUM_RAW: Print the raw number without any formatting * ZFS_NICENUM_RAWTIME: Same as RAW, but print dashes ('-') for zero. */ enum zfs_nicenum_format { ZFS_NICENUM_1024 = 0, ZFS_NICENUM_BYTES = 1, ZFS_NICENUM_TIME = 2, ZFS_NICENUM_RAW = 3, ZFS_NICENUM_RAWTIME = 4 }; /* * Convert a number to a human-readable form. */ extern void zfs_nicebytes(uint64_t, char *, size_t); extern void zfs_nicenum(uint64_t, char *, size_t); extern void zfs_nicenum_format(uint64_t, char *, size_t, enum zfs_nicenum_format); extern void zfs_nicetime(uint64_t, char *, size_t); extern void zfs_niceraw(uint64_t, char *, size_t); #define nicenum(num, buf, size) zfs_nicenum(num, buf, size) extern void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); +extern void fsleep(float sec); +extern int zpool_getenv_int(const char *env, int default_val); struct zfs_cmd; int zfs_ioctl_fd(int fd, unsigned long request, struct zfs_cmd *zc); /* * List of colors to use */ #define ANSI_BLACK "\033[0;30m" #define ANSI_RED "\033[0;31m" #define ANSI_GREEN "\033[0;32m" #define ANSI_YELLOW "\033[0;33m" #define ANSI_BLUE "\033[0;34m" #define ANSI_BOLD_BLUE "\033[1;34m" /* light blue */ #define ANSI_MAGENTA "\033[0;35m" #define ANSI_CYAN "\033[0;36m" #define ANSI_GRAY "\033[0;37m" #define ANSI_RESET "\033[0m" #define ANSI_BOLD "\033[1m" int use_color(void); void color_start(const char *color); void color_end(void); int printf_color(const char *color, char *format, ...); #ifdef __linux__ extern char **environ; _LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]); _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); #else #define zfs_setproctitle(fmt, ...) setproctitle(fmt, ##__VA_ARGS__) #define zfs_setproctitle_init(x, y, z) ((void)0) #endif /* * These functions are used by the ZFS libraries and cmd/zpool code, but are * not exported in the ABI. */ typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *); int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, void *data); +int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data); +int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, + void *data); +/* + * Often you'll want to iterate over all the vdevs in the pool, but don't want + * to use for_each_vdev() since it requires a callback function. + * + * Instead you can use FOR_EACH_VDEV(): + * + * zpool_handle_t *zhp // Assume this is initialized + * nvlist_t *nv + * ... + * FOR_EACH_VDEV(zhp, nv) { + * const char *path = NULL; + * nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path); + * printf("Looking at vdev %s\n", path); + * } + * + * Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs. However, + * there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for + * raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't + * really be an issue. + * + * Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0 + * pool: + * + * 100 vdevs = 0.7ms + * 500 vdevs = 17ms + * 750 vdevs = 40ms + * 1000 vdevs = 82ms + * + * The '__nv += 0' at the end of the for() loop gets around a "comma or + * semicolon followed by non-blank" checkstyle error. Note on most compliers + * the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7 + * will give a 'warning: statement with no effect' error if you do that. + */ +#define __FOR_EACH_VDEV(__zhp, __nv, __func) { \ + __nv = zpool_get_config(__zhp, NULL); \ + VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \ + } \ + for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \ + for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \ + __nv += 0) + +#define FOR_EACH_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func) + +/* + * "real leaf" vdevs are leaf vdevs that are real devices (disks or files). + * This excludes leaf vdevs like like draid spares. + */ +#define FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func) + int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data); void update_vdevs_config_dev_sysfs_path(nvlist_t *config); _LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, const char *key); #ifdef __cplusplus } #endif #endif /* _LIBZUTIL_H */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index fc6c6e8e2cf6..b3e8948cefcb 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1,4989 +1,5028 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov * Copyright (c) 2018 Datto Inc. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2018, loli10K * Copyright (c) 2021, Colm Buckley */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "libzfs_impl.h" #include "zfs_comutil.h" #include "zfeature_common.h" static boolean_t zpool_vdev_is_interior(const char *name); typedef struct prop_flags { unsigned int create:1; /* Validate property on creation */ unsigned int import:1; /* Validate property on import */ } prop_flags_t; /* * ==================================================================== * zpool property functions * ==================================================================== */ static int zpool_get_all_props(zpool_handle_t *zhp) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) return (-1); while (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) { if (errno == ENOMEM) { if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { zcmd_free_nvlists(&zc); return (-1); } } else { zcmd_free_nvlists(&zc); return (-1); } } if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) { zcmd_free_nvlists(&zc); return (-1); } zcmd_free_nvlists(&zc); return (0); } int zpool_props_refresh(zpool_handle_t *zhp) { nvlist_t *old_props; old_props = zhp->zpool_props; if (zpool_get_all_props(zhp) != 0) return (-1); nvlist_free(old_props); return (0); } static const char * zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src) { nvlist_t *nv, *nvl; uint64_t ival; char *value; zprop_source_t source; nvl = zhp->zpool_props; if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0); source = ival; verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); } else { source = ZPROP_SRC_DEFAULT; if ((value = (char *)zpool_prop_default_string(prop)) == NULL) value = "-"; } if (src) *src = source; return (value); } uint64_t zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src) { nvlist_t *nv, *nvl; uint64_t value; zprop_source_t source; if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) { /* * zpool_get_all_props() has most likely failed because * the pool is faulted, but if all we need is the top level * vdev's guid then get it from the zhp config nvlist. */ if ((prop == ZPOOL_PROP_GUID) && (nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) && (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value) == 0)) { return (value); } return (zpool_prop_default_numeric(prop)); } nvl = zhp->zpool_props; if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0); source = value; verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); } else { source = ZPROP_SRC_DEFAULT; value = zpool_prop_default_numeric(prop); } if (src) *src = source; return (value); } /* * Map VDEV STATE to printed strings. */ const char * zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) { switch (state) { case VDEV_STATE_CLOSED: case VDEV_STATE_OFFLINE: return (gettext("OFFLINE")); case VDEV_STATE_REMOVED: return (gettext("REMOVED")); case VDEV_STATE_CANT_OPEN: if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) return (gettext("FAULTED")); else if (aux == VDEV_AUX_SPLIT_POOL) return (gettext("SPLIT")); else return (gettext("UNAVAIL")); case VDEV_STATE_FAULTED: return (gettext("FAULTED")); case VDEV_STATE_DEGRADED: return (gettext("DEGRADED")); case VDEV_STATE_HEALTHY: return (gettext("ONLINE")); default: break; } return (gettext("UNKNOWN")); } /* * Map POOL STATE to printed strings. */ const char * zpool_pool_state_to_name(pool_state_t state) { switch (state) { default: break; case POOL_STATE_ACTIVE: return (gettext("ACTIVE")); case POOL_STATE_EXPORTED: return (gettext("EXPORTED")); case POOL_STATE_DESTROYED: return (gettext("DESTROYED")); case POOL_STATE_SPARE: return (gettext("SPARE")); case POOL_STATE_L2CACHE: return (gettext("L2CACHE")); case POOL_STATE_UNINITIALIZED: return (gettext("UNINITIALIZED")); case POOL_STATE_UNAVAIL: return (gettext("UNAVAIL")); case POOL_STATE_POTENTIALLY_ACTIVE: return (gettext("POTENTIALLY_ACTIVE")); } return (gettext("UNKNOWN")); } /* * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED", * "SUSPENDED", etc). */ const char * zpool_get_state_str(zpool_handle_t *zhp) { zpool_errata_t errata; zpool_status_t status; nvlist_t *nvroot; vdev_stat_t *vs; uint_t vsc; const char *str; status = zpool_get_status(zhp, NULL, &errata); if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { str = gettext("FAULTED"); } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT || status == ZPOOL_STATUS_IO_FAILURE_MMP) { str = gettext("SUSPENDED"); } else { verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); str = zpool_state_to_name(vs->vs_state, vs->vs_aux); } return (str); } /* * Get a zpool property value for 'prop' and return the value in * a pre-allocated buffer. */ int zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, zprop_source_t *srctype, boolean_t literal) { uint64_t intval; const char *strval; zprop_source_t src = ZPROP_SRC_NONE; if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { switch (prop) { case ZPOOL_PROP_NAME: (void) strlcpy(buf, zpool_get_name(zhp), len); break; case ZPOOL_PROP_HEALTH: (void) strlcpy(buf, zpool_get_state_str(zhp), len); break; case ZPOOL_PROP_GUID: intval = zpool_get_prop_int(zhp, prop, &src); (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); break; case ZPOOL_PROP_ALTROOT: case ZPOOL_PROP_CACHEFILE: case ZPOOL_PROP_COMMENT: case ZPOOL_PROP_COMPATIBILITY: if (zhp->zpool_props != NULL || zpool_get_all_props(zhp) == 0) { (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src), len); break; } fallthrough; default: (void) strlcpy(buf, "-", len); break; } if (srctype != NULL) *srctype = src; return (0); } if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) && prop != ZPOOL_PROP_NAME) return (-1); switch (zpool_prop_get_type(prop)) { case PROP_TYPE_STRING: (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src), len); break; case PROP_TYPE_NUMBER: intval = zpool_get_prop_int(zhp, prop, &src); switch (prop) { case ZPOOL_PROP_SIZE: case ZPOOL_PROP_ALLOCATED: case ZPOOL_PROP_FREE: case ZPOOL_PROP_FREEING: case ZPOOL_PROP_LEAKED: case ZPOOL_PROP_ASHIFT: if (literal) (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); else (void) zfs_nicenum(intval, buf, len); break; case ZPOOL_PROP_EXPANDSZ: case ZPOOL_PROP_CHECKPOINT: if (intval == 0) { (void) strlcpy(buf, "-", len); } else if (literal) { (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); } else { (void) zfs_nicebytes(intval, buf, len); } break; case ZPOOL_PROP_CAPACITY: if (literal) { (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); } else { (void) snprintf(buf, len, "%llu%%", (u_longlong_t)intval); } break; case ZPOOL_PROP_FRAGMENTATION: if (intval == UINT64_MAX) { (void) strlcpy(buf, "-", len); } else if (literal) { (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); } else { (void) snprintf(buf, len, "%llu%%", (u_longlong_t)intval); } break; case ZPOOL_PROP_DEDUPRATIO: if (literal) (void) snprintf(buf, len, "%llu.%02llu", (u_longlong_t)(intval / 100), (u_longlong_t)(intval % 100)); else (void) snprintf(buf, len, "%llu.%02llux", (u_longlong_t)(intval / 100), (u_longlong_t)(intval % 100)); break; case ZPOOL_PROP_HEALTH: (void) strlcpy(buf, zpool_get_state_str(zhp), len); break; case ZPOOL_PROP_VERSION: if (intval >= SPA_VERSION_FEATURES) { (void) snprintf(buf, len, "-"); break; } fallthrough; default: (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); } break; case PROP_TYPE_INDEX: intval = zpool_get_prop_int(zhp, prop, &src); if (zpool_prop_index_to_string(prop, intval, &strval) != 0) return (-1); (void) strlcpy(buf, strval, len); break; default: abort(); } if (srctype) *srctype = src; return (0); } /* * Check if the bootfs name has the same pool name as it is set to. * Assuming bootfs is a valid dataset name. */ static boolean_t bootfs_name_valid(const char *pool, const char *bootfs) { int len = strlen(pool); if (bootfs[0] == '\0') return (B_TRUE); if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT)) return (B_FALSE); if (strncmp(pool, bootfs, len) == 0 && (bootfs[len] == '/' || bootfs[len] == '\0')) return (B_TRUE); return (B_FALSE); } /* * Given an nvlist of zpool properties to be set, validate that they are * correct, and parse any numeric properties (index, boolean, etc) if they are * specified as strings. */ static nvlist_t * zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf) { nvpair_t *elem; nvlist_t *retprops; zpool_prop_t prop; char *strval; uint64_t intval; char *slash, *check; struct stat64 statbuf; zpool_handle_t *zhp; char report[1024]; if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) { (void) no_memory(hdl); return (NULL); } elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { const char *propname = nvpair_name(elem); prop = zpool_name_to_prop(propname); if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) { int err; char *fname = strchr(propname, '@') + 1; err = zfeature_lookup_name(fname, NULL); if (err != 0) { ASSERT3U(err, ==, ENOENT); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "feature '%s' unsupported by kernel"), fname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (nvpair_type(elem) != DATA_TYPE_STRING) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' must be a string"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } (void) nvpair_value_string(elem, &strval); if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 && strcmp(strval, ZFS_FEATURE_DISABLED) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' can only be set to " "'enabled' or 'disabled'"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (!flags.create && strcmp(strval, ZFS_FEATURE_DISABLED) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' can only be set to " "'disabled' at creation time"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (nvlist_add_uint64(retprops, propname, 0) != 0) { (void) no_memory(hdl); goto error; } continue; } /* * Make sure this property is valid and applies to this type. */ if (prop == ZPOOL_PROP_INVAL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (zpool_prop_readonly(prop)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " "is readonly"), propname); (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); goto error; } if (!flags.create && zpool_prop_setonce(prop)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' can only be set at " "creation time"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops, &strval, &intval, errbuf) != 0) goto error; /* * Perform additional checking for specific properties. */ switch (prop) { case ZPOOL_PROP_VERSION: if (intval < version || !SPA_VERSION_IS_SUPPORTED(intval)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' number %llu is invalid."), propname, (unsigned long long)intval); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); goto error; } break; case ZPOOL_PROP_ASHIFT: if (intval != 0 && (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' number %llu is invalid, " "only values between %" PRId32 " and %" PRId32 " are allowed."), propname, (unsigned long long)intval, ASHIFT_MIN, ASHIFT_MAX); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZPOOL_PROP_BOOTFS: if (flags.create || flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' cannot be set at creation " "or import time"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (version < SPA_VERSION_BOOTFS) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to support " "'%s' property"), propname); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); goto error; } /* * bootfs property value has to be a dataset name and * the dataset has to be in the same pool as it sets to. */ if (!bootfs_name_valid(poolname, strval)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " "is an invalid name"), strval); (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); goto error; } if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "could not open pool '%s'"), poolname); (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); goto error; } zpool_close(zhp); break; case ZPOOL_PROP_ALTROOT: if (!flags.create && !flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' can only be set during pool " "creation or import"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } if (strval[0] != '/') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), strval); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); goto error; } break; case ZPOOL_PROP_CACHEFILE: if (strval[0] == '\0') break; if (strcmp(strval, "none") == 0) break; if (strval[0] != '/') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' must be empty, an " "absolute path, or 'none'"), propname); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); goto error; } slash = strrchr(strval, '/'); if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || strcmp(slash, "/..") == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is not a valid file"), strval); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); goto error; } *slash = '\0'; if (strval[0] != '\0' && (stat64(strval, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is not a valid directory"), strval); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); goto error; } *slash = '/'; break; case ZPOOL_PROP_COMPATIBILITY: switch (zpool_load_compat(strval, NULL, report, 1024)) { case ZPOOL_COMPATIBILITY_OK: case ZPOOL_COMPATIBILITY_WARNTOKEN: break; case ZPOOL_COMPATIBILITY_BADFILE: case ZPOOL_COMPATIBILITY_BADTOKEN: case ZPOOL_COMPATIBILITY_NOFILES: zfs_error_aux(hdl, "%s", report); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZPOOL_PROP_COMMENT: for (check = strval; *check != '\0'; check++) { if (!isprint(*check)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "comment may only have printable " "characters")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } } if (strlen(strval) > ZPROP_MAX_COMMENT) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "comment must not exceed %d characters"), ZPROP_MAX_COMMENT); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZPOOL_PROP_READONLY: if (!flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s' can only be set at " "import time"), propname); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZPOOL_PROP_MULTIHOST: if (get_system_hostid() == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "requires a non-zero system hostid")); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; case ZPOOL_PROP_DEDUPDITTO: printf("Note: property '%s' no longer has " "any effect\n", propname); break; default: break; } } return (retprops); error: nvlist_free(retprops); return (NULL); } /* * Set zpool property : propname=propval. */ int zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) { zfs_cmd_t zc = {"\0"}; int ret = -1; char errbuf[1024]; nvlist_t *nvl = NULL; nvlist_t *realprops; uint64_t version; prop_flags_t flags = { 0 }; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), zhp->zpool_name); if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) return (no_memory(zhp->zpool_hdl)); if (nvlist_add_string(nvl, propname, propval) != 0) { nvlist_free(nvl); return (no_memory(zhp->zpool_hdl)); } version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); if ((realprops = zpool_valid_proplist(zhp->zpool_hdl, zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) { nvlist_free(nvl); return (-1); } nvlist_free(nvl); nvl = realprops; /* * Execute the corresponding ioctl() to set this property. */ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) { nvlist_free(nvl); return (-1); } ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc); zcmd_free_nvlists(&zc); nvlist_free(nvl); if (ret) (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); else (void) zpool_props_refresh(zhp); return (ret); } int zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp, boolean_t literal) { libzfs_handle_t *hdl = zhp->zpool_hdl; zprop_list_t *entry; char buf[ZFS_MAXPROPLEN]; nvlist_t *features = NULL; nvpair_t *nvp; zprop_list_t **last; boolean_t firstexpand = (NULL == *plp); int i; if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0) return (-1); last = plp; while (*last != NULL) last = &(*last)->pl_next; if ((*plp)->pl_all) features = zpool_get_features(zhp); if ((*plp)->pl_all && firstexpand) { for (i = 0; i < SPA_FEATURES; i++) { zprop_list_t *entry = zfs_alloc(hdl, sizeof (zprop_list_t)); entry->pl_prop = ZPROP_INVAL; entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s", spa_feature_table[i].fi_uname); entry->pl_width = strlen(entry->pl_user_prop); entry->pl_all = B_TRUE; *last = entry; last = &entry->pl_next; } } /* add any unsupported features */ for (nvp = nvlist_next_nvpair(features, NULL); nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) { char *propname; boolean_t found; zprop_list_t *entry; if (zfeature_is_supported(nvpair_name(nvp))) continue; propname = zfs_asprintf(hdl, "unsupported@%s", nvpair_name(nvp)); /* * Before adding the property to the list make sure that no * other pool already added the same property. */ found = B_FALSE; entry = *plp; while (entry != NULL) { if (entry->pl_user_prop != NULL && strcmp(propname, entry->pl_user_prop) == 0) { found = B_TRUE; break; } entry = entry->pl_next; } if (found) { free(propname); continue; } entry = zfs_alloc(hdl, sizeof (zprop_list_t)); entry->pl_prop = ZPROP_INVAL; entry->pl_user_prop = propname; entry->pl_width = strlen(entry->pl_user_prop); entry->pl_all = B_TRUE; *last = entry; last = &entry->pl_next; } for (entry = *plp; entry != NULL; entry = entry->pl_next) { if (entry->pl_fixed && !literal) continue; if (entry->pl_prop != ZPROP_INVAL && zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf), NULL, literal) == 0) { if (strlen(buf) > entry->pl_width) entry->pl_width = strlen(buf); } } return (0); } /* * Get the state for the given feature on the given ZFS pool. */ int zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf, size_t len) { uint64_t refcount; boolean_t found = B_FALSE; nvlist_t *features = zpool_get_features(zhp); boolean_t supported; const char *feature = strchr(propname, '@') + 1; supported = zpool_prop_feature(propname); ASSERT(supported || zpool_prop_unsupported(propname)); /* * Convert from feature name to feature guid. This conversion is * unnecessary for unsupported@... properties because they already * use guids. */ if (supported) { int ret; spa_feature_t fid; ret = zfeature_lookup_name(feature, &fid); if (ret != 0) { (void) strlcpy(buf, "-", len); return (ENOTSUP); } feature = spa_feature_table[fid].fi_guid; } if (nvlist_lookup_uint64(features, feature, &refcount) == 0) found = B_TRUE; if (supported) { if (!found) { (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len); } else { if (refcount == 0) (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len); else (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len); } } else { if (found) { if (refcount == 0) { (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE); } else { (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY); } } else { (void) strlcpy(buf, "-", len); return (ENOTSUP); } } return (0); } /* * Validate the given pool name, optionally putting an extended error message in * 'buf'. */ boolean_t zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) { namecheck_err_t why; char what; int ret; ret = pool_namecheck(pool, &why, &what); /* * The rules for reserved pool names were extended at a later point. * But we need to support users with existing pools that may now be * invalid. So we only check for this expanded set of names during a * create (or import), and only in userland. */ if (ret == 0 && !isopen && (strncmp(pool, "mirror", 6) == 0 || strncmp(pool, "raidz", 5) == 0 || strncmp(pool, "draid", 5) == 0 || strncmp(pool, "spare", 5) == 0 || strcmp(pool, "log") == 0)) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is reserved")); return (B_FALSE); } if (ret != 0) { if (hdl != NULL) { switch (why) { case NAME_ERR_TOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is too long")); break; case NAME_ERR_INVALCHAR: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character " "'%c' in pool name"), what); break; case NAME_ERR_NOLETTER: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name must begin with a letter")); break; case NAME_ERR_RESERVED: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is reserved")); break; case NAME_ERR_DISKLIKE: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool name is reserved")); break; case NAME_ERR_LEADING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "leading slash in name")); break; case NAME_ERR_EMPTY_COMPONENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "empty component in name")); break; case NAME_ERR_TRAILING_SLASH: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "trailing slash in name")); break; case NAME_ERR_MULTIPLE_DELIMITERS: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "multiple '@' and/or '#' delimiters in " "name")); break; case NAME_ERR_NO_AT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "permission set is missing '@'")); break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); break; } } return (B_FALSE); } return (B_TRUE); } /* * Open a handle to the given pool, even if the pool is currently in the FAULTED * state. */ zpool_handle_t * zpool_open_canfail(libzfs_handle_t *hdl, const char *pool) { zpool_handle_t *zhp; boolean_t missing; /* * Make sure the pool name is valid. */ if (!zpool_name_valid(hdl, B_TRUE, pool)) { (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME, dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool); return (NULL); } if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) return (NULL); zhp->zpool_hdl = hdl; (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); if (zpool_refresh_stats(zhp, &missing) != 0) { zpool_close(zhp); return (NULL); } if (missing) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool")); (void) zfs_error_fmt(hdl, EZFS_NOENT, dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool); zpool_close(zhp); return (NULL); } return (zhp); } /* * Like the above, but silent on error. Used when iterating over pools (because * the configuration cache may be out of date). */ int zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret) { zpool_handle_t *zhp; boolean_t missing; if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) return (-1); zhp->zpool_hdl = hdl; (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); if (zpool_refresh_stats(zhp, &missing) != 0) { zpool_close(zhp); return (-1); } if (missing) { zpool_close(zhp); *ret = NULL; return (0); } *ret = zhp; return (0); } /* * Similar to zpool_open_canfail(), but refuses to open pools in the faulted * state. */ zpool_handle_t * zpool_open(libzfs_handle_t *hdl, const char *pool) { zpool_handle_t *zhp; if ((zhp = zpool_open_canfail(hdl, pool)) == NULL) return (NULL); if (zhp->zpool_state == POOL_STATE_UNAVAIL) { (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL, dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name); zpool_close(zhp); return (NULL); } return (zhp); } /* * Close the handle. Simply frees the memory associated with the handle. */ void zpool_close(zpool_handle_t *zhp) { nvlist_free(zhp->zpool_config); nvlist_free(zhp->zpool_old_config); nvlist_free(zhp->zpool_props); free(zhp); } /* * Return the name of the pool. */ const char * zpool_get_name(zpool_handle_t *zhp) { return (zhp->zpool_name); } /* * Return the state of the pool (ACTIVE or UNAVAILABLE) */ int zpool_get_state(zpool_handle_t *zhp) { return (zhp->zpool_state); } /* * Check if vdev list contains a special vdev */ static boolean_t zpool_has_special_vdev(nvlist_t *nvroot) { nvlist_t **child; uint_t children; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (uint_t c = 0; c < children; c++) { char *bias; if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 && strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) { return (B_TRUE); } } } return (B_FALSE); } /* * Check if vdev list contains a dRAID vdev */ static boolean_t zpool_has_draid_vdev(nvlist_t *nvroot) { nvlist_t **child; uint_t children; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (uint_t c = 0; c < children; c++) { char *type; if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) == 0 && strcmp(type, VDEV_TYPE_DRAID) == 0) { return (B_TRUE); } } } return (B_FALSE); } /* * Output a dRAID top-level vdev name in to the provided buffer. */ static char * zpool_draid_name(char *name, int len, uint64_t data, uint64_t parity, uint64_t spares, uint64_t children) { snprintf(name, len, "%s%llu:%llud:%lluc:%llus", VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data, (u_longlong_t)children, (u_longlong_t)spares); return (name); } /* * Return B_TRUE if the provided name is a dRAID spare name. */ boolean_t zpool_is_draid_spare(const char *name) { uint64_t spare_id, parity, vdev_id; if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu", (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id, (u_longlong_t *)&spare_id) == 3) { return (B_TRUE); } return (B_FALSE); } /* * Create the named pool, using the provided vdev list. It is assumed * that the consumer has already validated the contents of the nvlist, so we * don't have to worry about error semantics. */ int zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t *fsprops) { zfs_cmd_t zc = {"\0"}; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; nvlist_t *hidden_args = NULL; uint8_t *wkeydata = NULL; uint_t wkeylen = 0; char msg[1024]; int ret = -1; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot create '%s'"), pool); if (!zpool_name_valid(hdl, B_FALSE, pool)) return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) return (-1); if (props) { prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE }; if ((zc_props = zpool_valid_proplist(hdl, pool, props, SPA_VERSION_1, flags, msg)) == NULL) { goto create_failed; } } if (fsprops) { uint64_t zoned; char *zonestr; zoned = ((nvlist_lookup_string(fsprops, zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) && strcmp(zonestr, "on") == 0); if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) { goto create_failed; } if (nvlist_exists(zc_fsprops, zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) && !zpool_has_special_vdev(nvroot)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s property requires a special vdev"), zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)); (void) zfs_error(hdl, EZFS_BADPROP, msg); goto create_failed; } if (!zc_props && (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { goto create_failed; } if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE, &wkeydata, &wkeylen) != 0) { zfs_error(hdl, EZFS_CRYPTOFAILED, msg); goto create_failed; } if (nvlist_add_nvlist(zc_props, ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { goto create_failed; } if (wkeydata != NULL) { if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0) goto create_failed; if (nvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen) != 0) goto create_failed; if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS, hidden_args) != 0) goto create_failed; } } if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) goto create_failed; (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) { zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); nvlist_free(hidden_args); if (wkeydata != NULL) free(wkeydata); switch (errno) { case EBUSY: /* * This can happen if the user has specified the same * device multiple times. We can't reliably detect this * until we try to add it and see we already have a * label. This can also happen under if the device is * part of an active md or lvm device. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more vdevs refer to the same device, or " "one of\nthe devices is part of an active md or " "lvm device")); return (zfs_error(hdl, EZFS_BADDEV, msg)); case ERANGE: /* * This happens if the record size is smaller or larger * than the allowed size range, or not a power of 2. * * NOTE: although zfs_valid_proplist is called earlier, * this case may have slipped through since the * pool does not exist yet and it is therefore * impossible to read properties e.g. max blocksize * from the pool. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "record size invalid")); return (zfs_error(hdl, EZFS_BADPROP, msg)); case EOVERFLOW: /* * This occurs when one of the devices is below * SPA_MINDEVSIZE. Unfortunately, we can't detect which * device was the problem device since there's no * reliable way to determine device size from userland. */ { char buf[64]; zfs_nicebytes(SPA_MINDEVSIZE, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is less than the " "minimum size (%s)"), buf); } return (zfs_error(hdl, EZFS_BADDEV, msg)); case ENOSPC: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is out of space")); return (zfs_error(hdl, EZFS_BADDEV, msg)); case EINVAL: if (zpool_has_draid_vdev(nvroot) && zfeature_lookup_name("draid", NULL) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dRAID vdevs are unsupported by the " "kernel")); return (zfs_error(hdl, EZFS_BADDEV, msg)); } else { return (zpool_standard_error(hdl, errno, msg)); } default: return (zpool_standard_error(hdl, errno, msg)); } } create_failed: zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); nvlist_free(hidden_args); if (wkeydata != NULL) free(wkeydata); return (ret); } /* * Destroy the given pool. It is up to the caller to ensure that there are no * datasets left in the pool. */ int zpool_destroy(zpool_handle_t *zhp, const char *log_str) { zfs_cmd_t zc = {"\0"}; zfs_handle_t *zfp = NULL; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; if (zhp->zpool_state == POOL_STATE_ACTIVE && (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zpool_name); if (errno == EROFS) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is read only")); (void) zfs_error(hdl, EZFS_BADDEV, msg); } else { (void) zpool_standard_error(hdl, errno, msg); } if (zfp) zfs_close(zfp); return (-1); } if (zfp) { remove_mountpoint(zfp); zfs_close(zfp); } return (0); } /* * Create a checkpoint in the given pool. */ int zpool_checkpoint(zpool_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; int error; error = lzc_pool_checkpoint(zhp->zpool_name); if (error != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot checkpoint '%s'"), zhp->zpool_name); (void) zpool_standard_error(hdl, error, msg); return (-1); } return (0); } /* * Discard the checkpoint from the given pool. */ int zpool_discard_checkpoint(zpool_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; int error; error = lzc_pool_checkpoint_discard(zhp->zpool_name); if (error != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot discard checkpoint in '%s'"), zhp->zpool_name); (void) zpool_standard_error(hdl, error, msg); return (-1); } return (0); } /* * Add the given vdevs to the pool. The caller must have already performed the * necessary verification to ensure that the vdev specification is well-formed. */ int zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) { zfs_cmd_t zc = {"\0"}; int ret; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot add to '%s'"), zhp->zpool_name); if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < SPA_VERSION_SPARES && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " "upgraded to add hot spares")); return (zfs_error(hdl, EZFS_BADVERSION, msg)); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < SPA_VERSION_L2CACHE && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " "upgraded to add cache devices")); return (zfs_error(hdl, EZFS_BADVERSION, msg)); } if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { switch (errno) { case EBUSY: /* * This can happen if the user has specified the same * device multiple times. We can't reliably detect this * until we try to add it and see we already have a * label. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more vdevs refer to the same device")); (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EINVAL: if (zpool_has_draid_vdev(nvroot) && zfeature_lookup_name("draid", NULL) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dRAID vdevs are unsupported by the " "kernel")); } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid config; a pool with removing/" "removed vdevs does not support adding " "raidz or dRAID vdevs")); } (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EOVERFLOW: /* * This occurs when one of the devices is below * SPA_MINDEVSIZE. Unfortunately, we can't detect which * device was the problem device since there's no * reliable way to determine device size from userland. */ { char buf[64]; zfs_nicebytes(SPA_MINDEVSIZE, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "device is less than the minimum " "size (%s)"), buf); } (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to add these vdevs")); (void) zfs_error(hdl, EZFS_BADVERSION, msg); break; default: (void) zpool_standard_error(hdl, errno, msg); } ret = -1; } else { ret = 0; } zcmd_free_nvlists(&zc); return (ret); } /* * Exports the pool from the system. The caller must ensure that there are no * mounted datasets in the pool. */ static int zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, const char *log_str) { zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = force; zc.zc_guid = hardforce; zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { switch (errno) { case EXDEV: zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, "use '-f' to override the following errors:\n" "'%s' has an active shared spare which could be" " used by other pools once '%s' is exported."), zhp->zpool_name, zhp->zpool_name); return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, dgettext(TEXT_DOMAIN, "cannot export '%s'"), zhp->zpool_name)); default: return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, dgettext(TEXT_DOMAIN, "cannot export '%s'"), zhp->zpool_name)); } } return (0); } int zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str) { return (zpool_export_common(zhp, force, B_FALSE, log_str)); } int zpool_export_force(zpool_handle_t *zhp, const char *log_str) { return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str)); } static void zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun, nvlist_t *config) { nvlist_t *nv = NULL; uint64_t rewindto; int64_t loss = -1; struct tm t; char timestr[128]; if (!hdl->libzfs_printerr || config == NULL) return; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) { return; } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) return; (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); if (localtime_r((time_t *)&rewindto, &t) != NULL && strftime(timestr, 128, "%c", &t) != 0) { if (dryrun) { (void) printf(dgettext(TEXT_DOMAIN, "Would be able to return %s " "to its state as of %s.\n"), name, timestr); } else { (void) printf(dgettext(TEXT_DOMAIN, "Pool %s returned to its state as of %s.\n"), name, timestr); } if (loss > 120) { (void) printf(dgettext(TEXT_DOMAIN, "%s approximately %lld "), dryrun ? "Would discard" : "Discarded", ((longlong_t)loss + 30) / 60); (void) printf(dgettext(TEXT_DOMAIN, "minutes of transactions.\n")); } else if (loss > 0) { (void) printf(dgettext(TEXT_DOMAIN, "%s approximately %lld "), dryrun ? "Would discard" : "Discarded", (longlong_t)loss); (void) printf(dgettext(TEXT_DOMAIN, "seconds of transactions.\n")); } } } void zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, nvlist_t *config) { nvlist_t *nv = NULL; int64_t loss = -1; uint64_t edata = UINT64_MAX; uint64_t rewindto; struct tm t; char timestr[128]; if (!hdl->libzfs_printerr) return; if (reason >= 0) (void) printf(dgettext(TEXT_DOMAIN, "action: ")); else (void) printf(dgettext(TEXT_DOMAIN, "\t")); /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 || nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) goto no_info; (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS, &edata); (void) printf(dgettext(TEXT_DOMAIN, "Recovery is possible, but will result in some data loss.\n")); if (localtime_r((time_t *)&rewindto, &t) != NULL && strftime(timestr, 128, "%c", &t) != 0) { (void) printf(dgettext(TEXT_DOMAIN, "\tReturning the pool to its state as of %s\n" "\tshould correct the problem. "), timestr); } else { (void) printf(dgettext(TEXT_DOMAIN, "\tReverting the pool to an earlier state " "should correct the problem.\n\t")); } if (loss > 120) { (void) printf(dgettext(TEXT_DOMAIN, "Approximately %lld minutes of data\n" "\tmust be discarded, irreversibly. "), ((longlong_t)loss + 30) / 60); } else if (loss > 0) { (void) printf(dgettext(TEXT_DOMAIN, "Approximately %lld seconds of data\n" "\tmust be discarded, irreversibly. "), (longlong_t)loss); } if (edata != 0 && edata != UINT64_MAX) { if (edata == 1) { (void) printf(dgettext(TEXT_DOMAIN, "After rewind, at least\n" "\tone persistent user-data error will remain. ")); } else { (void) printf(dgettext(TEXT_DOMAIN, "After rewind, several\n" "\tpersistent user-data errors will remain. ")); } } (void) printf(dgettext(TEXT_DOMAIN, "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "), reason >= 0 ? "clear" : "import", name); (void) printf(dgettext(TEXT_DOMAIN, "A scrub of the pool\n" "\tis strongly recommended after recovery.\n")); return; no_info: (void) printf(dgettext(TEXT_DOMAIN, "Destroy and re-create the pool from\n\ta backup source.\n")); } /* * zpool_import() is a contracted interface. Should be kept the same * if possible. * * Applications should use zpool_import_props() to import a pool with * new properties value to be set. */ int zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, char *altroot) { nvlist_t *props = NULL; int ret; if (altroot != NULL) { if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { return (zfs_error_fmt(hdl, EZFS_NOMEM, dgettext(TEXT_DOMAIN, "cannot import '%s'"), newname)); } if (nvlist_add_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 || nvlist_add_string(props, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) { nvlist_free(props); return (zfs_error_fmt(hdl, EZFS_NOMEM, dgettext(TEXT_DOMAIN, "cannot import '%s'"), newname)); } } ret = zpool_import_props(hdl, config, newname, props, ZFS_IMPORT_NORMAL); nvlist_free(props); return (ret); } static void print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv, int indent) { nvlist_t **child; uint_t c, children; char *vname; uint64_t is_log = 0; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); if (name != NULL) (void) printf("\t%*s%s%s\n", indent, "", name, is_log ? " [log]" : ""); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return; for (c = 0; c < children; c++) { vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID); print_vdev_tree(hdl, vname, child[c], indent + 2); free(vname); } } void zpool_print_unsup_feat(nvlist_t *config) { nvlist_t *nvinfo, *unsup_feat; nvpair_t *nvp; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0); verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT, &unsup_feat) == 0); for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL; nvp = nvlist_next_nvpair(unsup_feat, nvp)) { char *desc; verify(nvpair_type(nvp) == DATA_TYPE_STRING); verify(nvpair_value_string(nvp, &desc) == 0); if (strlen(desc) > 0) (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc); else (void) printf("\t%s\n", nvpair_name(nvp)); } } /* * Import the given pool using the known configuration and a list of * properties to be set. The configuration should have come from * zpool_find_import(). The 'newname' parameters control whether the pool * is imported with a different name. */ int zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, nvlist_t *props, int flags) { zfs_cmd_t zc = {"\0"}; zpool_load_policy_t policy; nvlist_t *nv = NULL; nvlist_t *nvinfo = NULL; nvlist_t *missing = NULL; char *thename; char *origname; int ret; int error = 0; char errbuf[1024]; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &origname) == 0); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot import pool '%s'"), origname); if (newname != NULL) { if (!zpool_name_valid(hdl, B_FALSE, newname)) return (zfs_error_fmt(hdl, EZFS_INVALIDNAME, dgettext(TEXT_DOMAIN, "cannot import '%s'"), newname)); thename = (char *)newname; } else { thename = origname; } if (props != NULL) { uint64_t version; prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); if ((props = zpool_valid_proplist(hdl, origname, props, version, flags, errbuf)) == NULL) return (-1); if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { nvlist_free(props); return (-1); } nvlist_free(props); } (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name)); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &zc.zc_guid) == 0); if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) { zcmd_free_nvlists(&zc); return (-1); } if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) { zcmd_free_nvlists(&zc); return (-1); } zc.zc_cookie = flags; while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 && errno == ENOMEM) { if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { zcmd_free_nvlists(&zc); return (-1); } } if (ret != 0) error = errno; (void) zcmd_read_dst_nvlist(hdl, &zc, &nv); zcmd_free_nvlists(&zc); zpool_get_load_policy(config, &policy); if (error) { char desc[1024]; char aux[256]; /* * Dry-run failed, but we print out what success * looks like if we found a best txg */ if (policy.zlp_rewind & ZPOOL_TRY_REWIND) { zpool_rewind_exclaim(hdl, newname ? origname : thename, B_TRUE, nv); nvlist_free(nv); return (-1); } if (newname == NULL) (void) snprintf(desc, sizeof (desc), dgettext(TEXT_DOMAIN, "cannot import '%s'"), thename); else (void) snprintf(desc, sizeof (desc), dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"), origname, thename); switch (error) { case ENOTSUP: if (nv != NULL && nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) { (void) printf(dgettext(TEXT_DOMAIN, "This " "pool uses the following feature(s) not " "supported by this system:\n")); zpool_print_unsup_feat(nv); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) { (void) printf(dgettext(TEXT_DOMAIN, "All unsupported features are only " "required for writing to the pool." "\nThe pool can be imported using " "'-o readonly=on'.\n")); } } /* * Unsupported version. */ (void) zfs_error(hdl, EZFS_BADVERSION, desc); break; case EREMOTEIO: if (nv != NULL && nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) { char *hostname = ""; uint64_t hostid = 0; mmp_state_t mmp_state; mmp_state = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_STATE); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) hostname = fnvlist_lookup_string(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) hostid = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_HOSTID); if (mmp_state == MMP_STATE_ACTIVE) { (void) snprintf(aux, sizeof (aux), dgettext(TEXT_DOMAIN, "pool is imp" "orted on host '%s' (hostid=%lx).\n" "Export the pool on the other " "system, then run 'zpool import'."), hostname, (unsigned long) hostid); } else if (mmp_state == MMP_STATE_NO_HOSTID) { (void) snprintf(aux, sizeof (aux), dgettext(TEXT_DOMAIN, "pool has " "the multihost property on and " "the\nsystem's hostid is not set. " "Set a unique system hostid with " "the zgenhostid(8) command.\n")); } (void) zfs_error_aux(hdl, "%s", aux); } (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc); break; case EINVAL: (void) zfs_error(hdl, EZFS_INVALCONFIG, desc); break; case EROFS: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is read only")); (void) zfs_error(hdl, EZFS_BADDEV, desc); break; case ENXIO: if (nv && nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) { (void) printf(dgettext(TEXT_DOMAIN, "The devices below are missing or " "corrupted, use '-m' to import the pool " "anyway:\n")); print_vdev_tree(hdl, NULL, missing, 2); (void) printf("\n"); } (void) zpool_standard_error(hdl, error, desc); break; case EEXIST: (void) zpool_standard_error(hdl, error, desc); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices are already in use\n")); (void) zfs_error(hdl, EZFS_BADDEV, desc); break; case ENAMETOOLONG: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "new name of at least one dataset is longer than " "the maximum allowable length")); (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc); break; default: (void) zpool_standard_error(hdl, error, desc); zpool_explain_recover(hdl, newname ? origname : thename, -error, nv); break; } nvlist_free(nv); ret = -1; } else { zpool_handle_t *zhp; /* * This should never fail, but play it safe anyway. */ if (zpool_open_silent(hdl, thename, &zhp) != 0) ret = -1; else if (zhp != NULL) zpool_close(zhp); if (policy.zlp_rewind & (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { zpool_rewind_exclaim(hdl, newname ? origname : thename, ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv); } nvlist_free(nv); return (0); } return (ret); } /* * Translate vdev names to guids. If a vdev_path is determined to be * unsuitable then a vd_errlist is allocated and the vdev path and errno * are added to it. */ static int zpool_translate_vdev_guids(zpool_handle_t *zhp, nvlist_t *vds, nvlist_t *vdev_guids, nvlist_t *guids_to_paths, nvlist_t **vd_errlist) { nvlist_t *errlist = NULL; int error = 0; for (nvpair_t *elem = nvlist_next_nvpair(vds, NULL); elem != NULL; elem = nvlist_next_nvpair(vds, elem)) { boolean_t spare, cache; char *vd_path = nvpair_name(elem); nvlist_t *tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache, NULL); if ((tgt == NULL) || cache || spare) { if (errlist == NULL) { errlist = fnvlist_alloc(); error = EINVAL; } uint64_t err = (tgt == NULL) ? EZFS_NODEVICE : (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE); fnvlist_add_int64(errlist, vd_path, err); continue; } uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); fnvlist_add_uint64(vdev_guids, vd_path, guid); char msg[MAXNAMELEN]; (void) snprintf(msg, sizeof (msg), "%llu", (u_longlong_t)guid); fnvlist_add_string(guids_to_paths, msg, vd_path); } if (error != 0) { verify(errlist != NULL); if (vd_errlist != NULL) *vd_errlist = errlist; else fnvlist_free(errlist); } return (error); } static int xlate_init_err(int err) { switch (err) { case ENODEV: return (EZFS_NODEVICE); case EINVAL: case EROFS: return (EZFS_BADDEV); case EBUSY: return (EZFS_INITIALIZING); case ESRCH: return (EZFS_NO_INITIALIZE); } return (err); } /* * Begin, suspend, cancel, or uninit (clear) the initialization (initializing * of all free blocks) for the given vdevs in the given pool. */ static int zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, nvlist_t *vds, boolean_t wait) { int err; nvlist_t *vdev_guids = fnvlist_alloc(); nvlist_t *guids_to_paths = fnvlist_alloc(); nvlist_t *vd_errlist = NULL; nvlist_t *errlist; nvpair_t *elem; err = zpool_translate_vdev_guids(zhp, vds, vdev_guids, guids_to_paths, &vd_errlist); if (err != 0) { verify(vd_errlist != NULL); goto list_errors; } err = lzc_initialize(zhp->zpool_name, cmd_type, vdev_guids, &errlist); if (err != 0) { if (errlist != NULL && nvlist_lookup_nvlist(errlist, ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) { goto list_errors; } if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) { zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, "uninitialize is not supported by kernel")); } (void) zpool_standard_error(zhp->zpool_hdl, err, dgettext(TEXT_DOMAIN, "operation failed")); goto out; } if (wait) { for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL; elem = nvlist_next_nvpair(vdev_guids, elem)) { uint64_t guid = fnvpair_value_uint64(elem); err = lzc_wait_tag(zhp->zpool_name, ZPOOL_WAIT_INITIALIZE, guid, NULL); if (err != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, err, dgettext(TEXT_DOMAIN, "error " "waiting for '%s' to initialize"), nvpair_name(elem)); goto out; } } } goto out; list_errors: for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL; elem = nvlist_next_nvpair(vd_errlist, elem)) { int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem)); char *path; if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem), &path) != 0) path = nvpair_name(elem); (void) zfs_error_fmt(zhp->zpool_hdl, vd_error, "cannot initialize '%s'", path); } out: fnvlist_free(vdev_guids); fnvlist_free(guids_to_paths); if (vd_errlist != NULL) fnvlist_free(vd_errlist); return (err == 0 ? 0 : -1); } int zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, nvlist_t *vds) { return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE)); } int zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, nvlist_t *vds) { return (zpool_initialize_impl(zhp, cmd_type, vds, B_TRUE)); } static int xlate_trim_err(int err) { switch (err) { case ENODEV: return (EZFS_NODEVICE); case EINVAL: case EROFS: return (EZFS_BADDEV); case EBUSY: return (EZFS_TRIMMING); case ESRCH: return (EZFS_NO_TRIM); case EOPNOTSUPP: return (EZFS_TRIM_NOTSUP); } return (err); } static int zpool_trim_wait(zpool_handle_t *zhp, nvlist_t *vdev_guids) { int err; nvpair_t *elem; for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL; elem = nvlist_next_nvpair(vdev_guids, elem)) { uint64_t guid = fnvpair_value_uint64(elem); err = lzc_wait_tag(zhp->zpool_name, ZPOOL_WAIT_TRIM, guid, NULL); if (err != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, err, dgettext(TEXT_DOMAIN, "error " "waiting to trim '%s'"), nvpair_name(elem)); return (err); } } return (0); } /* * Check errlist and report any errors, omitting ones which should be * suppressed. Returns B_TRUE if any errors were reported. */ static boolean_t check_trim_errs(zpool_handle_t *zhp, trimflags_t *trim_flags, nvlist_t *guids_to_paths, nvlist_t *vds, nvlist_t *errlist) { nvpair_t *elem; boolean_t reported_errs = B_FALSE; int num_vds = 0; int num_suppressed_errs = 0; for (elem = nvlist_next_nvpair(vds, NULL); elem != NULL; elem = nvlist_next_nvpair(vds, elem)) { num_vds++; } for (elem = nvlist_next_nvpair(errlist, NULL); elem != NULL; elem = nvlist_next_nvpair(errlist, elem)) { int64_t vd_error = xlate_trim_err(fnvpair_value_int64(elem)); char *path; /* * If only the pool was specified, and it was not a secure * trim then suppress warnings for individual vdevs which * do not support trimming. */ if (vd_error == EZFS_TRIM_NOTSUP && trim_flags->fullpool && !trim_flags->secure) { num_suppressed_errs++; continue; } reported_errs = B_TRUE; if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem), &path) != 0) path = nvpair_name(elem); (void) zfs_error_fmt(zhp->zpool_hdl, vd_error, "cannot trim '%s'", path); } if (num_suppressed_errs == num_vds) { (void) zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, "no devices in pool support trim operations")); (void) (zfs_error(zhp->zpool_hdl, EZFS_TRIM_NOTSUP, dgettext(TEXT_DOMAIN, "cannot trim"))); reported_errs = B_TRUE; } return (reported_errs); } /* * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for * the given vdevs in the given pool. */ int zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds, trimflags_t *trim_flags) { int err; int retval = 0; nvlist_t *vdev_guids = fnvlist_alloc(); nvlist_t *guids_to_paths = fnvlist_alloc(); nvlist_t *errlist = NULL; err = zpool_translate_vdev_guids(zhp, vds, vdev_guids, guids_to_paths, &errlist); if (err != 0) { check_trim_errs(zhp, trim_flags, guids_to_paths, vds, errlist); retval = -1; goto out; } err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate, trim_flags->secure, vdev_guids, &errlist); if (err != 0) { nvlist_t *vd_errlist; if (errlist != NULL && nvlist_lookup_nvlist(errlist, ZPOOL_TRIM_VDEVS, &vd_errlist) == 0) { if (check_trim_errs(zhp, trim_flags, guids_to_paths, vds, vd_errlist)) { retval = -1; goto out; } } else { char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "operation failed")); zpool_standard_error(zhp->zpool_hdl, err, msg); retval = -1; goto out; } } if (trim_flags->wait) retval = zpool_trim_wait(zhp, vdev_guids); out: if (errlist != NULL) fnvlist_free(errlist); fnvlist_free(vdev_guids); fnvlist_free(guids_to_paths); return (retval); } /* * Scan the pool. */ int zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) { zfs_cmd_t zc = {"\0"}; char msg[1024]; int err; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = func; zc.zc_flags = cmd; if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0) return (0); err = errno; /* ECANCELED on a scrub means we resumed a paused scrub */ if (err == ECANCELED && func == POOL_SCAN_SCRUB && cmd == POOL_SCRUB_NORMAL) return (0); if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL) return (0); if (func == POOL_SCAN_SCRUB) { if (cmd == POOL_SCRUB_PAUSE) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot pause scrubbing %s"), zc.zc_name); } else { assert(cmd == POOL_SCRUB_NORMAL); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); } } else if (func == POOL_SCAN_RESILVER) { assert(cmd == POOL_SCRUB_NORMAL); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot restart resilver on %s"), zc.zc_name); } else if (func == POOL_SCAN_NONE) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), zc.zc_name); } else { assert(!"unexpected result"); } if (err == EBUSY) { nvlist_t *nvroot; pool_scan_stat_t *ps = NULL; uint_t psc; verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); if (ps && ps->pss_func == POOL_SCAN_SCRUB && ps->pss_state == DSS_SCANNING) { if (cmd == POOL_SCRUB_PAUSE) return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg)); else return (zfs_error(hdl, EZFS_SCRUBBING, msg)); } else { return (zfs_error(hdl, EZFS_RESILVERING, msg)); } } else if (err == ENOENT) { return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); } else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) { return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg)); } else { return (zpool_standard_error(hdl, err, msg)); } } /* * Find a vdev that matches the search criteria specified. We use the * the nvpair name to determine how we should look for the device. * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL * spare; but FALSE if its an INUSE spare. */ static nvlist_t * vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { uint_t c, children; nvlist_t **child; nvlist_t *ret; uint64_t is_log; char *srchkey; nvpair_t *pair = nvlist_next_nvpair(search, NULL); /* Nothing to look for */ if (search == NULL || pair == NULL) return (NULL); /* Obtain the key we will use to search */ srchkey = nvpair_name(pair); switch (nvpair_type(pair)) { case DATA_TYPE_UINT64: if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { uint64_t srchval, theguid; verify(nvpair_value_uint64(pair, &srchval) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0); if (theguid == srchval) return (nv); } break; case DATA_TYPE_STRING: { char *srchval, *val; verify(nvpair_value_string(pair, &srchval) == 0); if (nvlist_lookup_string(nv, srchkey, &val) != 0) break; /* * Search for the requested value. Special cases: * * - ZPOOL_CONFIG_PATH for whole disk entries. These end in * "-part1", or "p1". The suffix is hidden from the user, * but included in the string, so this matches around it. * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname() * is used to check all possible expanded paths. * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). * * Otherwise, all other searches are simple string compares. */ if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) { uint64_t wholedisk = 0; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0) return (nv); } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { char *type, *idx, *end, *p; uint64_t id, vdev_id; /* * Determine our vdev type, keeping in mind * that the srchval is composed of a type and * vdev id pair (i.e. mirror-4). */ if ((type = strdup(srchval)) == NULL) return (NULL); if ((p = strrchr(type, '-')) == NULL) { free(type); break; } idx = p + 1; *p = '\0'; /* * If the types don't match then keep looking. */ if (strncmp(val, type, strlen(val)) != 0) { free(type); break; } verify(zpool_vdev_is_interior(type)); verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &id) == 0); errno = 0; vdev_id = strtoull(idx, &end, 10); /* * If we are looking for a raidz and a parity is * specified, make sure it matches. */ int rzlen = strlen(VDEV_TYPE_RAIDZ); assert(rzlen == strlen(VDEV_TYPE_DRAID)); int typlen = strlen(type); if ((strncmp(type, VDEV_TYPE_RAIDZ, rzlen) == 0 || strncmp(type, VDEV_TYPE_DRAID, rzlen) == 0) && typlen != rzlen) { uint64_t vdev_parity; int parity = *(type + rzlen) - '0'; if (parity <= 0 || parity > 3 || (typlen - rzlen) != 1) { /* * Nonsense parity specified, can * never match */ free(type); return (NULL); } verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &vdev_parity) == 0); if ((int)vdev_parity != parity) { free(type); break; } } free(type); if (errno != 0) return (NULL); /* * Now verify that we have the correct vdev id. */ if (vdev_id == id) return (nv); } /* * Common case */ if (strcmp(srchval, val) == 0) return (nv); break; } default: break; } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return (NULL); for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { /* * The 'is_log' value is only set for the toplevel * vdev, not the leaf vdevs. So we always lookup the * log device from the root of the vdev tree (where * 'log' is non-NULL). */ if (log != NULL && nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log) == 0 && is_log) { *log = B_TRUE; } return (ret); } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { *avail_spare = B_TRUE; return (ret); } } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { *l2cache = B_TRUE; return (ret); } } } return (NULL); } /* * Given a physical path or guid, find the associated vdev. */ nvlist_t * zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { nvlist_t *search, *nvroot, *ret; uint64_t guid; char *end; verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); guid = strtoull(ppath, &end, 0); if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); } else { verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0); } verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); *avail_spare = B_FALSE; *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); nvlist_free(search); return (ret); } /* * Determine if we have an "interior" top-level vdev (i.e mirror/raidz). */ static boolean_t zpool_vdev_is_interior(const char *name) { if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 || strncmp(name, VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 || strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) return (B_TRUE); if (strncmp(name, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0 && !zpool_is_draid_spare(name)) return (B_TRUE); return (B_FALSE); } +/* + * Lookup the nvlist for a given vdev. + */ nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { char *end; nvlist_t *nvroot, *search, *ret; uint64_t guid; + boolean_t __avail_spare, __l2cache, __log; verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); guid = strtoull(path, &end, 0); if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); } else if (zpool_vdev_is_interior(path)) { verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); } else { verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); } verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + /* + * User can pass NULL for avail_spare, l2cache, and log, but + * we still need to provide variables to vdev_to_nvlist_iter(), so + * just point them to junk variables here. + */ + if (!avail_spare) + avail_spare = &__avail_spare; + if (!l2cache) + l2cache = &__l2cache; + if (!log) + log = &__log; + *avail_spare = B_FALSE; *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); nvlist_free(search); return (ret); } static int vdev_is_online(nvlist_t *nv) { uint64_t ival; if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) return (0); return (1); } /* * Helper function for zpool_get_physpaths(). */ static int vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size, size_t *bytes_written) { size_t bytes_left, pos, rsz; char *tmppath; const char *format; if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0) return (EZFS_NODEVICE); pos = *bytes_written; bytes_left = physpath_size - pos; format = (pos == 0) ? "%s" : " %s"; rsz = snprintf(physpath + pos, bytes_left, format, tmppath); *bytes_written += rsz; if (rsz >= bytes_left) { /* if physpath was not copied properly, clear it */ if (bytes_left != 0) { physpath[pos] = 0; } return (EZFS_NOSPC); } return (0); } static int vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size, size_t *rsz, boolean_t is_spare) { char *type; int ret; if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) return (EZFS_INVALCONFIG); if (strcmp(type, VDEV_TYPE_DISK) == 0) { /* * An active spare device has ZPOOL_CONFIG_IS_SPARE set. * For a spare vdev, we only want to boot from the active * spare device. */ if (is_spare) { uint64_t spare = 0; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, &spare); if (!spare) return (EZFS_INVALCONFIG); } if (vdev_is_online(nv)) { if ((ret = vdev_get_one_physpath(nv, physpath, phypath_size, rsz)) != 0) return (ret); } } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || strcmp(type, VDEV_TYPE_RAIDZ) == 0 || strcmp(type, VDEV_TYPE_REPLACING) == 0 || (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { nvlist_t **child; uint_t count; int i, ret; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) return (EZFS_INVALCONFIG); for (i = 0; i < count; i++) { ret = vdev_get_physpaths(child[i], physpath, phypath_size, rsz, is_spare); if (ret == EZFS_NOSPC) return (ret); } } return (EZFS_POOL_INVALARG); } /* * Get phys_path for a root pool config. * Return 0 on success; non-zero on failure. */ static int zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size) { size_t rsz; nvlist_t *vdev_root; nvlist_t **child; uint_t count; char *type; rsz = 0; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev_root) != 0) return (EZFS_INVALCONFIG); if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 || nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) return (EZFS_INVALCONFIG); /* * root pool can only have a single top-level vdev. */ if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1) return (EZFS_POOL_INVALARG); (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz, B_FALSE); /* No online devices */ if (rsz == 0) return (EZFS_NODEVICE); return (0); } /* * Get phys_path for a root pool * Return 0 on success; non-zero on failure. */ int zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) { return (zpool_get_config_physpath(zhp->zpool_config, physpath, phypath_size)); } /* * Convert a vdev path to a GUID. Returns GUID or 0 on error. * * If is_spare, is_l2cache, or is_log is non-NULL, then store within it * if the VDEV is a spare, l2cache, or log device. If they're NULL then * ignore them. */ static uint64_t zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path, boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log) { uint64_t guid; boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE; nvlist_t *tgt; if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache, &log)) == NULL) return (0); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0); if (is_spare != NULL) *is_spare = spare; if (is_l2cache != NULL) *is_l2cache = l2cache; if (is_log != NULL) *is_log = log; return (guid); } /* Convert a vdev path to a GUID. Returns GUID or 0 on error. */ uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path) { return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL)); } /* * Bring the specified vdev online. The 'flags' parameter is a set of the * ZFS_ONLINE_* flags. */ int zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, vdev_state_t *newstate) { zfs_cmd_t zc = {"\0"}; char msg[1024]; char *pathname; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; int error; if (flags & ZFS_ONLINE_EXPAND) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot expand %s"), path); } else { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot online %s"), path); } (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, &islog)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); if (!(flags & ZFS_ONLINE_SPARE) && avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); if ((flags & ZFS_ONLINE_EXPAND || zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) && nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) { uint64_t wholedisk = 0; (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); /* * XXX - L2ARC 1.0 devices can't support expansion. */ if (l2cache) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot expand cache devices")); return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg)); } if (wholedisk) { const char *fullpath = path; char buf[MAXPATHLEN]; if (path[0] != '/') { error = zfs_resolve_shortname(path, buf, sizeof (buf)); if (error != 0) return (zfs_error(hdl, EZFS_NODEVICE, msg)); fullpath = buf; } error = zpool_relabel_disk(hdl, fullpath, msg); if (error != 0) return (error); } } zc.zc_cookie = VDEV_STATE_ONLINE; zc.zc_obj = flags; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { if (errno == EINVAL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split " "from this pool into a new one. Use '%s' " "instead"), "zpool detach"); return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg)); } return (zpool_standard_error(hdl, errno, msg)); } *newstate = zc.zc_cookie; return (0); } /* * Take the specified vdev offline */ int zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) { zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot offline %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, NULL)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); zc.zc_cookie = VDEV_STATE_OFFLINE; zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); switch (errno) { case EBUSY: /* * There are no other replicas of this device. */ return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); case EEXIST: /* * The log device has unplayed logs */ return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg)); default: return (zpool_standard_error(hdl, errno, msg)); } } /* * Remove the specified vdev asynchronously from the configuration, so * that it may come ONLINE if reinserted. This is called from zed on * Udev remove event. * Note: We also have a similar function zpool_vdev_remove() that * removes the vdev from the pool. */ int zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = {"\0"}; char errbuf[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot remove %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, NULL)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, errbuf)); zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); zc.zc_cookie = VDEV_STATE_REMOVED; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, errbuf)); } /* * Mark the given vdev faulted. */ int zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; zc.zc_cookie = VDEV_STATE_FAULTED; zc.zc_obj = aux; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); switch (errno) { case EBUSY: /* * There are no other replicas of this device. */ return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); default: return (zpool_standard_error(hdl, errno, msg)); } } /* - * Mark the given vdev degraded. + * Generic set vdev state function */ -int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +static int +zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux, + vdev_state_t state) { zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid); + dgettext(TEXT_DOMAIN, "cannot set %s %llu"), + zpool_state_to_name(state, aux), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_cookie = state; zc.zc_obj = aux; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, msg)); } +/* + * Mark the given vdev degraded. + */ +int +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED)); +} + +/* + * Mark the given vdev as in a removed state (as if the device does not exist). + * + * This is different than zpool_vdev_remove() which does a removal of a device + * from the pool (but the device does exist). + */ +int +zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED)); +} + /* * Returns TRUE if the given nvlist is a vdev that was originally swapped in as * a hot spare. */ static boolean_t is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which) { nvlist_t **child; uint_t c, children; char *type; if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE, &type) == 0); if ((strcmp(type, VDEV_TYPE_SPARE) == 0 || strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) && children == 2 && child[which] == tgt) return (B_TRUE); for (c = 0; c < children; c++) if (is_replacing_spare(child[c], tgt, which)) return (B_TRUE); } return (B_FALSE); } /* * Attach new_disk (fully described by nvroot) to old_disk. * If 'replacing' is specified, the new disk will replace the old one. */ int zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild) { zfs_cmd_t zc = {"\0"}; char msg[1024]; int ret; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; uint64_t val; char *newname; nvlist_t **child; uint_t children; nvlist_t *config_root; libzfs_handle_t *hdl = zhp->zpool_hdl; if (replacing) (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot replace %s with %s"), old_disk, new_disk); else (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot attach %s to %s"), new_disk, old_disk); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache, &islog)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); if (l2cache) return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); zc.zc_cookie = replacing; zc.zc_simple = rebuild; if (rebuild && zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs module doesn't support device rebuilds")); return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg)); } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0 || children != 1) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "new device must be a single disk")); return (zfs_error(hdl, EZFS_INVALCONFIG, msg)); } verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL) return (-1); /* * If the target is a hot spare that has been swapped in, we can only * replace it with another hot spare. */ if (replacing && nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 && (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache, NULL) == NULL || !avail_spare) && is_replacing_spare(config_root, tgt, 1)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "can only be replaced by another hot spare")); free(newname); return (zfs_error(hdl, EZFS_BADTARGET, msg)); } free(newname); if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) return (-1); ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc); zcmd_free_nvlists(&zc); if (ret == 0) return (0); switch (errno) { case ENOTSUP: /* * Can't attach to or replace this type of vdev. */ if (replacing) { uint64_t version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); if (islog) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot replace a log with a spare")); } else if (rebuild) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only mirror and dRAID vdevs support " "sequential reconstruction")); } else if (zpool_is_draid_spare(new_disk)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dRAID spares can only replace child " "devices in their parent's dRAID vdev")); } else if (version >= SPA_VERSION_MULTI_REPLACE) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "already in replacing/spare config; wait " "for completion or use 'zpool detach'")); } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot replace a replacing device")); } } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "can only attach to mirrors and top-level " "disks")); } (void) zfs_error(hdl, EZFS_BADTARGET, msg); break; case EINVAL: /* * The new device must be a single disk. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "new device must be a single disk")); (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); break; case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, " "or device removal is in progress"), new_disk); (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EOVERFLOW: /* * The new device is too small. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "device is too small")); (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EDOM: /* * The new device has a different optimal sector size. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "new device has a different optimal sector size; use the " "option '-o ashift=N' to override the optimal size")); (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case ENAMETOOLONG: /* * The resulting top-level vdev spec won't fit in the label. */ (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg); break; default: (void) zpool_standard_error(hdl, errno, msg); } return (-1); } /* * Detach the specified device. */ int zpool_vdev_detach(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot detach %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, NULL)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); if (l2cache) return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0) return (0); switch (errno) { case ENOTSUP: /* * Can't detach from this type of vdev. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only " "applicable to mirror and replacing vdevs")); (void) zfs_error(hdl, EZFS_BADTARGET, msg); break; case EBUSY: /* * There are no other replicas of this device. */ (void) zfs_error(hdl, EZFS_NOREPLICAS, msg); break; default: (void) zpool_standard_error(hdl, errno, msg); } return (-1); } /* * Find a mirror vdev in the source nvlist. * * The mchild array contains a list of disks in one of the top-level mirrors * of the source pool. The schild array contains a list of disks that the * user specified on the command line. We loop over the mchild array to * see if any entry in the schild array matches. * * If a disk in the mchild array is found in the schild array, we return * the index of that entry. Otherwise we return -1. */ static int find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren, nvlist_t **schild, uint_t schildren) { uint_t mc; for (mc = 0; mc < mchildren; mc++) { uint_t sc; char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp, mchild[mc], 0); for (sc = 0; sc < schildren; sc++) { char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp, schild[sc], 0); boolean_t result = (strcmp(mpath, spath) == 0); free(spath); if (result) { free(mpath); return (mc); } } free(mpath); } return (-1); } /* * Split a mirror pool. If newroot points to null, then a new nvlist * is generated and it is the responsibility of the caller to free it. */ int zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, nvlist_t *props, splitflags_t flags) { zfs_cmd_t zc = {"\0"}; char msg[1024], *bias; nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; nvlist_t **varray = NULL, *zc_props = NULL; uint_t c, children, newchildren, lastlog = 0, vcount, found = 0; libzfs_handle_t *hdl = zhp->zpool_hdl; uint64_t vers, readonly = B_FALSE; boolean_t freelist = B_FALSE, memory_err = B_TRUE; int retval = 0; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name); if (!zpool_name_valid(hdl, B_FALSE, newname)) return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); if ((config = zpool_get_config(zhp, NULL)) == NULL) { (void) fprintf(stderr, gettext("Internal error: unable to " "retrieve pool configuration\n")); return (-1); } verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0); if (props) { prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name, props, vers, flags, msg)) == NULL) return (-1); (void) nvlist_lookup_uint64(zc_props, zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); if (readonly) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property %s can only be set at import time"), zpool_prop_to_name(ZPOOL_PROP_READONLY)); return (-1); } } if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Source pool is missing vdev tree")); nvlist_free(zc_props); return (-1); } varray = zfs_alloc(hdl, children * sizeof (nvlist_t *)); vcount = 0; if (*newroot == NULL || nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, &newchild, &newchildren) != 0) newchildren = 0; for (c = 0; c < children; c++) { uint64_t is_log = B_FALSE, is_hole = B_FALSE; boolean_t is_special = B_FALSE, is_dedup = B_FALSE; char *type; nvlist_t **mchild, *vdev; uint_t mchildren; int entry; /* * Unlike cache & spares, slogs are stored in the * ZPOOL_CONFIG_CHILDREN array. We filter them out here. */ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log); (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &is_hole); if (is_log || is_hole) { /* * Create a hole vdev and put it in the config. */ if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0) goto out; if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) != 0) goto out; if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE, 1) != 0) goto out; if (lastlog == 0) lastlog = vcount; varray[vcount++] = vdev; continue; } lastlog = 0; verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) { vdev = child[c]; if (nvlist_dup(vdev, &varray[vcount++], 0) != 0) goto out; continue; } else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Source pool must be composed only of mirrors\n")); retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); goto out; } if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0) { if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) is_special = B_TRUE; else if (strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0) is_dedup = B_TRUE; } verify(nvlist_lookup_nvlist_array(child[c], ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); /* find or add an entry for this top-level vdev */ if (newchildren > 0 && (entry = find_vdev_entry(zhp, mchild, mchildren, newchild, newchildren)) >= 0) { /* We found a disk that the user specified. */ vdev = mchild[entry]; ++found; } else { /* User didn't specify a disk for this vdev. */ vdev = mchild[mchildren - 1]; } if (nvlist_dup(vdev, &varray[vcount++], 0) != 0) goto out; if (flags.dryrun != 0) { if (is_dedup == B_TRUE) { if (nvlist_add_string(varray[vcount - 1], ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) != 0) goto out; } else if (is_special == B_TRUE) { if (nvlist_add_string(varray[vcount - 1], ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_SPECIAL) != 0) goto out; } } } /* did we find every disk the user specified? */ if (found != newchildren) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must " "include at most one disk from each mirror")); retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); goto out; } /* Prepare the nvlist for populating. */ if (*newroot == NULL) { if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0) goto out; freelist = B_TRUE; if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0) goto out; } else { verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0); } /* Add all the children we found */ if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray, lastlog == 0 ? vcount : lastlog) != 0) goto out; /* * If we're just doing a dry run, exit now with success. */ if (flags.dryrun) { memory_err = B_FALSE; freelist = B_FALSE; goto out; } /* now build up the config list & call the ioctl */ if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0) goto out; if (nvlist_add_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 || nvlist_add_string(newconfig, ZPOOL_CONFIG_POOL_NAME, newname) != 0 || nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0) goto out; /* * The new pool is automatically part of the namespace unless we * explicitly export it. */ if (!flags.import) zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string)); if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0) goto out; if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) goto out; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) { retval = zpool_standard_error(hdl, errno, msg); goto out; } freelist = B_FALSE; memory_err = B_FALSE; out: if (varray != NULL) { int v; for (v = 0; v < vcount; v++) nvlist_free(varray[v]); free(varray); } zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(newconfig); if (freelist) { nvlist_free(*newroot); *newroot = NULL; } if (retval != 0) return (retval); if (memory_err) return (no_memory(hdl)); return (0); } /* * Remove the given device. */ int zpool_vdev_remove(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; uint64_t version; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot remove %s"), path); if (zpool_is_draid_spare(path)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dRAID spares cannot be removed")); return (zfs_error(hdl, EZFS_NODEVICE, msg)); } (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, &islog)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); if (islog && version < SPA_VERSION_HOLES) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to support log removal")); return (zfs_error(hdl, EZFS_BADVERSION, msg)); } zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) return (0); switch (errno) { case EINVAL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid config; all top-level vdevs must " "have the same sector size and not be raidz.")); (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); break; case EBUSY: if (islog) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Mount encrypted datasets to replay logs.")); } else { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Pool busy; removal may already be in progress")); } (void) zfs_error(hdl, EZFS_BUSY, msg); break; case EACCES: if (islog) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Mount encrypted datasets to replay logs.")); (void) zfs_error(hdl, EZFS_BUSY, msg); } else { (void) zpool_standard_error(hdl, errno, msg); } break; default: (void) zpool_standard_error(hdl, errno, msg); } return (-1); } int zpool_vdev_remove_cancel(zpool_handle_t *zhp) { zfs_cmd_t zc; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot cancel removal")); bzero(&zc, sizeof (zc)); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = 1; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, msg)); } int zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path, uint64_t *sizep) { char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"), path); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, &islog)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); if (avail_spare || l2cache || islog) { *sizep = 0; return (0); } if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "indirect size not available")); return (zfs_error(hdl, EINVAL, msg)); } return (0); } /* * Clear the errors for the pool, or the particular device if specified. */ int zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) { zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; zpool_load_policy_t policy; boolean_t avail_spare, l2cache; libzfs_handle_t *hdl = zhp->zpool_hdl; nvlist_t *nvi = NULL; int error; if (path) (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), path); else (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), zhp->zpool_name); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (path) { if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, NULL)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); /* * Don't allow error clearing for hot spares. Do allow * error clearing for l2cache devices. */ if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); } zpool_get_load_policy(rewindnvl, &policy); zc.zc_cookie = policy.zlp_rewind; if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0) return (-1); if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0) return (-1); while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 && errno == ENOMEM) { if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { zcmd_free_nvlists(&zc); return (-1); } } if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) && errno != EPERM && errno != EACCES)) { if (policy.zlp_rewind & (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); zpool_rewind_exclaim(hdl, zc.zc_name, ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nvi); nvlist_free(nvi); } zcmd_free_nvlists(&zc); return (0); } zcmd_free_nvlists(&zc); return (zpool_standard_error(hdl, errno, msg)); } /* * Similar to zpool_clear(), but takes a GUID (used by fmd). */ int zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) { zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; zc.zc_cookie = ZPOOL_NO_REWIND; if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, msg)); } /* * Change the GUID for a pool. */ int zpool_reguid(zpool_handle_t *zhp) { char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; zfs_cmd_t zc = {"\0"}; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, msg)); } /* * Reopen the pool. */ int zpool_reopen_one(zpool_handle_t *zhp, void *data) { libzfs_handle_t *hdl = zpool_get_handle(zhp); const char *pool_name = zpool_get_name(zhp); boolean_t *scrub_restart = data; int error; error = lzc_reopen(pool_name, *scrub_restart); if (error) { return (zpool_standard_error_fmt(hdl, error, dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name)); } return (0); } /* call into libzfs_core to execute the sync IOCTL per pool */ int zpool_sync_one(zpool_handle_t *zhp, void *data) { int ret; libzfs_handle_t *hdl = zpool_get_handle(zhp); const char *pool_name = zpool_get_name(zhp); boolean_t *force = data; nvlist_t *innvl = fnvlist_alloc(); fnvlist_add_boolean_value(innvl, "force", *force); if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) { nvlist_free(innvl); return (zpool_standard_error_fmt(hdl, ret, dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name)); } nvlist_free(innvl); return (0); } #define PATH_BUF_LEN 64 /* * Given a vdev, return the name to display in iostat. If the vdev has a path, * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type. * We also check if this is a whole disk, in which case we strip off the * trailing 's0' slice name. * * This routine is also responsible for identifying when disks have been * reconfigured in a new location. The kernel will have opened the device by * devid, but the path will still refer to the old location. To catch this, we * first do a path -> devid translation (which is fast for the common case). If * the devid matches, we're done. If not, we do a reverse devid -> path * translation and issue the appropriate ioctl() to update the path of the vdev. * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any * of these checks. */ char * zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, int name_flags) { char *path, *type, *env; uint64_t value; char buf[PATH_BUF_LEN]; char tmpbuf[PATH_BUF_LEN]; /* * vdev_name will be "root"/"root-0" for the root vdev, but it is the * zpool name that will be displayed to the user. */ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (zhp != NULL && strcmp(type, "root") == 0) return (zfs_strdup(hdl, zpool_get_name(zhp))); env = getenv("ZPOOL_VDEV_NAME_PATH"); if (env && (strtoul(env, NULL, 0) > 0 || !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) name_flags |= VDEV_NAME_PATH; env = getenv("ZPOOL_VDEV_NAME_GUID"); if (env && (strtoul(env, NULL, 0) > 0 || !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) name_flags |= VDEV_NAME_GUID; env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS"); if (env && (strtoul(env, NULL, 0) > 0 || !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) name_flags |= VDEV_NAME_FOLLOW_LINKS; if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 || name_flags & VDEV_NAME_GUID) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value); (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value); path = buf; } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { if (name_flags & VDEV_NAME_FOLLOW_LINKS) { char *rp = realpath(path, NULL); if (rp) { strlcpy(buf, rp, sizeof (buf)); path = buf; free(rp); } } /* * For a block device only use the name. */ if ((strcmp(type, VDEV_TYPE_DISK) == 0) && !(name_flags & VDEV_NAME_PATH)) { path = zfs_strip_path(path); } /* * Remove the partition from the path if this is a whole disk. */ if (strcmp(type, VDEV_TYPE_DRAID_SPARE) != 0 && nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) == 0 && value && !(name_flags & VDEV_NAME_PATH)) { return (zfs_strip_partition(path)); } } else { path = type; /* * If it's a raidz device, we need to stick in the parity level. */ if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &value) == 0); (void) snprintf(buf, sizeof (buf), "%s%llu", path, (u_longlong_t)value); path = buf; } /* * If it's a dRAID device, we add parity, groups, and spares. */ if (strcmp(path, VDEV_TYPE_DRAID) == 0) { uint64_t ndata, nparity, nspares; nvlist_t **child; uint_t children; verify(nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &nparity) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, &ndata) == 0); verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, &nspares) == 0); path = zpool_draid_name(buf, sizeof (buf), ndata, nparity, nspares, children); } /* * We identify each top-level vdev by using a * naming convention. */ if (name_flags & VDEV_NAME_TYPE_ID) { uint64_t id; verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &id) == 0); (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu", path, (u_longlong_t)id); path = tmpbuf; } } return (zfs_strdup(hdl, path)); } static int zbookmark_mem_compare(const void *a, const void *b) { return (memcmp(a, b, sizeof (zbookmark_phys_t))); } /* * Retrieve the persistent error log, uniquify the members, and return to the * caller. */ int zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; uint64_t count; zbookmark_phys_t *zb = NULL; int i; /* * Retrieve the raw error list from the kernel. If the number of errors * has increased, allocate more space and continue until we get the * entire list. */ verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT, &count) == 0); if (count == 0) return (0); zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, count * sizeof (zbookmark_phys_t)); zc.zc_nvlist_dst_size = count; (void) strcpy(zc.zc_name, zhp->zpool_name); for (;;) { if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG, &zc) != 0) { free((void *)(uintptr_t)zc.zc_nvlist_dst); if (errno == ENOMEM) { void *dst; count = zc.zc_nvlist_dst_size; dst = zfs_alloc(zhp->zpool_hdl, count * sizeof (zbookmark_phys_t)); zc.zc_nvlist_dst = (uintptr_t)dst; } else { return (zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "errors: List of " "errors unavailable"))); } } else { break; } } /* * Sort the resulting bookmarks. This is a little confusing due to the * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last * to first, and 'zc_nvlist_dst_size' indicates the number of bookmarks * _not_ copied as part of the process. So we point the start of our * array appropriate and decrement the total number of elements. */ zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) + zc.zc_nvlist_dst_size; count -= zc.zc_nvlist_dst_size; qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); /* * Fill in the nverrlistp with nvlist's of dataset and object numbers. */ for (i = 0; i < count; i++) { nvlist_t *nv; /* ignoring zb_blkid and zb_level for now */ if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset && zb[i-1].zb_object == zb[i].zb_object) continue; if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0) goto nomem; if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, zb[i].zb_objset) != 0) { nvlist_free(nv); goto nomem; } if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, zb[i].zb_object) != 0) { nvlist_free(nv); goto nomem; } if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) { nvlist_free(nv); goto nomem; } nvlist_free(nv); } free((void *)(uintptr_t)zc.zc_nvlist_dst); return (0); nomem: free((void *)(uintptr_t)zc.zc_nvlist_dst); return (no_memory(zhp->zpool_hdl)); } /* * Upgrade a ZFS pool to the latest on-disk version. */ int zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strcpy(zc.zc_name, zhp->zpool_name); zc.zc_cookie = new_version; if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0) return (zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"), zhp->zpool_name)); return (0); } void zfs_save_arguments(int argc, char **argv, char *string, int len) { int i; (void) strlcpy(string, basename(argv[0]), len); for (i = 1; i < argc; i++) { (void) strlcat(string, " ", len); (void) strlcat(string, argv[i], len); } } int zpool_log_history(libzfs_handle_t *hdl, const char *message) { zfs_cmd_t zc = {"\0"}; nvlist_t *args; int err; args = fnvlist_alloc(); fnvlist_add_string(args, "message", message); err = zcmd_write_src_nvlist(hdl, &zc, args); if (err == 0) err = zfs_ioctl(hdl, ZFS_IOC_LOG_HISTORY, &zc); nvlist_free(args); zcmd_free_nvlists(&zc); return (err); } /* * Perform ioctl to get some command history of a pool. * * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the * logical offset of the history buffer to start reading from. * * Upon return, 'off' is the next logical offset to read from and * 'len' is the actual amount of bytes read into 'buf'. */ static int get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_history = (uint64_t)(uintptr_t)buf; zc.zc_history_len = *len; zc.zc_history_offset = *off; if (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) { switch (errno) { case EPERM: return (zfs_error_fmt(hdl, EZFS_PERM, dgettext(TEXT_DOMAIN, "cannot show history for pool '%s'"), zhp->zpool_name)); case ENOENT: return (zfs_error_fmt(hdl, EZFS_NOHISTORY, dgettext(TEXT_DOMAIN, "cannot get history for pool " "'%s'"), zhp->zpool_name)); case ENOTSUP: return (zfs_error_fmt(hdl, EZFS_BADVERSION, dgettext(TEXT_DOMAIN, "cannot get history for pool " "'%s', pool must be upgraded"), zhp->zpool_name)); default: return (zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get history for '%s'"), zhp->zpool_name)); } } *len = zc.zc_history_len; *off = zc.zc_history_offset; return (0); } /* * Retrieve the command history of a pool. */ int zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off, boolean_t *eof) { char *buf; int buflen = 128 * 1024; nvlist_t **records = NULL; uint_t numrecords = 0; int err, i; uint64_t start = *off; buf = malloc(buflen); if (buf == NULL) return (ENOMEM); /* process about 1MB a time */ while (*off - start < 1024 * 1024) { uint64_t bytes_read = buflen; uint64_t leftover; if ((err = get_history(zhp, buf, off, &bytes_read)) != 0) break; /* if nothing else was read in, we're at EOF, just return */ if (!bytes_read) { *eof = B_TRUE; break; } if ((err = zpool_history_unpack(buf, bytes_read, &leftover, &records, &numrecords)) != 0) break; *off -= leftover; if (leftover == bytes_read) { /* * no progress made, because buffer is not big enough * to hold this record; resize and retry. */ buflen *= 2; free(buf); buf = malloc(buflen); if (buf == NULL) return (ENOMEM); } } free(buf); if (!err) { verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0); verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD, records, numrecords) == 0); } for (i = 0; i < numrecords; i++) nvlist_free(records[i]); free(records); return (err); } /* * Retrieve the next event given the passed 'zevent_fd' file descriptor. * If there is a new event available 'nvp' will contain a newly allocated * nvlist and 'dropped' will be set to the number of missed events since * the last call to this function. When 'nvp' is set to NULL it indicates * no new events are available. In either case the function returns 0 and * it is up to the caller to free 'nvp'. In the case of a fatal error the * function will return a non-zero value. When the function is called in * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed), * it will not return until a new event is available. */ int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int *dropped, unsigned flags, int zevent_fd) { zfs_cmd_t zc = {"\0"}; int error = 0; *nvp = NULL; *dropped = 0; zc.zc_cleanup_fd = zevent_fd; if (flags & ZEVENT_NONBLOCK) zc.zc_guid = ZEVENT_NONBLOCK; if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0) return (-1); retry: if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) { switch (errno) { case ESHUTDOWN: error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL, dgettext(TEXT_DOMAIN, "zfs shutdown")); goto out; case ENOENT: /* Blocking error case should not occur */ if (!(flags & ZEVENT_NONBLOCK)) error = zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get event")); goto out; case ENOMEM: if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { error = zfs_error_fmt(hdl, EZFS_NOMEM, dgettext(TEXT_DOMAIN, "cannot get event")); goto out; } else { goto retry; } default: error = zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get event")); goto out; } } error = zcmd_read_dst_nvlist(hdl, &zc, nvp); if (error != 0) goto out; *dropped = (int)zc.zc_cookie; out: zcmd_free_nvlists(&zc); return (error); } /* * Clear all events. */ int zpool_events_clear(libzfs_handle_t *hdl, int *count) { zfs_cmd_t zc = {"\0"}; if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0) return (zpool_standard_error(hdl, errno, dgettext(TEXT_DOMAIN, "cannot clear events"))); if (count != NULL) *count = (int)zc.zc_cookie; /* # of events cleared */ return (0); } /* * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for * the passed zevent_fd file handle. On success zero is returned, * otherwise -1 is returned and hdl->libzfs_error is set to the errno. */ int zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd) { zfs_cmd_t zc = {"\0"}; int error = 0; zc.zc_guid = eid; zc.zc_cleanup_fd = zevent_fd; if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) { switch (errno) { case ENOENT: error = zfs_error_fmt(hdl, EZFS_NOENT, dgettext(TEXT_DOMAIN, "cannot get event")); break; case ENOMEM: error = zfs_error_fmt(hdl, EZFS_NOMEM, dgettext(TEXT_DOMAIN, "cannot get event")); break; default: error = zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get event")); break; } } return (error); } static void zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len, boolean_t always_unmounted) { zfs_cmd_t zc = {"\0"}; boolean_t mounted = B_FALSE; char *mntpnt = NULL; char dsname[ZFS_MAX_DATASET_NAME_LEN]; if (dsobj == 0) { /* special case for the MOS */ (void) snprintf(pathname, len, ":<0x%llx>", (longlong_t)obj); return; } /* get the dataset's name */ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_obj = dsobj; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) { /* just write out a path of two object numbers */ (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>", (longlong_t)dsobj, (longlong_t)obj); return; } (void) strlcpy(dsname, zc.zc_value, sizeof (dsname)); /* find out if the dataset is mounted */ mounted = !always_unmounted && is_mounted(zhp->zpool_hdl, dsname, &mntpnt); /* get the corrupted object's path */ (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); zc.zc_obj = obj; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_OBJ_TO_PATH, &zc) == 0) { if (mounted) { (void) snprintf(pathname, len, "%s%s", mntpnt, zc.zc_value); } else { (void) snprintf(pathname, len, "%s:%s", dsname, zc.zc_value); } } else { (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj); } free(mntpnt); } void zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) { zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE); } void zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) { zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_TRUE); } /* * Wait while the specified activity is in progress in the pool. */ int zpool_wait(zpool_handle_t *zhp, zpool_wait_activity_t activity) { boolean_t missing; int error = zpool_wait_status(zhp, activity, &missing, NULL); if (missing) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, ENOENT, dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"), zhp->zpool_name); return (ENOENT); } else { return (error); } } /* * Wait for the given activity and return the status of the wait (whether or not * any waiting was done) in the 'waited' parameter. Non-existent pools are * reported via the 'missing' parameter, rather than by printing an error * message. This is convenient when this function is called in a loop over a * long period of time (as it is, for example, by zpool's wait cmd). In that * scenario, a pool being exported or destroyed should be considered a normal * event, so we don't want to print an error when we find that the pool doesn't * exist. */ int zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity, boolean_t *missing, boolean_t *waited) { int error = lzc_wait(zhp->zpool_name, activity, waited); *missing = (error == ENOENT); if (*missing) return (0); if (error != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"), zhp->zpool_name); } return (error); } int zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap) { int error = lzc_set_bootenv(zhp->zpool_name, envmap); if (error != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, dgettext(TEXT_DOMAIN, "error setting bootenv in pool '%s'"), zhp->zpool_name); } return (error); } int zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp) { nvlist_t *nvl; int error; nvl = NULL; error = lzc_get_bootenv(zhp->zpool_name, &nvl); if (error != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, dgettext(TEXT_DOMAIN, "error getting bootenv in pool '%s'"), zhp->zpool_name); } else { *nvlp = nvl; } return (error); } /* * Attempt to read and parse feature file(s) (from "compatibility" property). * Files contain zpool feature names, comma or whitespace-separated. * Comments (# character to next newline) are discarded. * * Arguments: * compatibility : string containing feature filenames * features : either NULL or pointer to array of boolean * report : either NULL or pointer to string buffer * rlen : length of "report" buffer * * compatibility is NULL (unset), "", "off", "legacy", or list of * comma-separated filenames. filenames should either be absolute, * or relative to: * 1) ZPOOL_SYSCONF_COMPAT_D (eg: /etc/zfs/compatibility.d) or * 2) ZPOOL_DATA_COMPAT_D (eg: /usr/share/zfs/compatibility.d). * (Unset), "" or "off" => enable all features * "legacy" => disable all features * * Any feature names read from files which match unames in spa_feature_table * will have the corresponding boolean set in the features array (if non-NULL). * If more than one feature set specified, only features present in *all* of * them will be set. * * "report" if not NULL will be populated with a suitable status message. * * Return values: * ZPOOL_COMPATIBILITY_OK : files read and parsed ok * ZPOOL_COMPATIBILITY_BADFILE : file too big or not a text file * ZPOOL_COMPATIBILITY_BADTOKEN : SYSCONF file contains invalid feature name * ZPOOL_COMPATIBILITY_WARNTOKEN : DATA file contains invalid feature name * ZPOOL_COMPATIBILITY_NOFILES : no feature files found */ zpool_compat_status_t zpool_load_compat(const char *compat, boolean_t *features, char *report, size_t rlen) { int sdirfd, ddirfd, featfd; struct stat fs; char *fc; char *ps, *ls, *ws; char *file, *line, *word; char l_compat[ZFS_MAXPROPLEN]; boolean_t ret_nofiles = B_TRUE; boolean_t ret_badfile = B_FALSE; boolean_t ret_badtoken = B_FALSE; boolean_t ret_warntoken = B_FALSE; /* special cases (unset), "" and "off" => enable all features */ if (compat == NULL || compat[0] == '\0' || strcmp(compat, ZPOOL_COMPAT_OFF) == 0) { if (features != NULL) for (uint_t i = 0; i < SPA_FEATURES; i++) features[i] = B_TRUE; if (report != NULL) strlcpy(report, gettext("all features enabled"), rlen); return (ZPOOL_COMPATIBILITY_OK); } /* Final special case "legacy" => disable all features */ if (strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0) { if (features != NULL) for (uint_t i = 0; i < SPA_FEATURES; i++) features[i] = B_FALSE; if (report != NULL) strlcpy(report, gettext("all features disabled"), rlen); return (ZPOOL_COMPATIBILITY_OK); } /* * Start with all true; will be ANDed with results from each file */ if (features != NULL) for (uint_t i = 0; i < SPA_FEATURES; i++) features[i] = B_TRUE; char err_badfile[ZFS_MAXPROPLEN] = ""; char err_badtoken[ZFS_MAXPROPLEN] = ""; /* * We ignore errors from the directory open() * as they're only needed if the filename is relative * which will be checked during the openat(). */ /* O_PATH safer than O_RDONLY if system allows it */ #if defined(O_PATH) #define ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_PATH) #else #define ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_RDONLY) #endif sdirfd = open(ZPOOL_SYSCONF_COMPAT_D, ZC_DIR_FLAGS); ddirfd = open(ZPOOL_DATA_COMPAT_D, ZC_DIR_FLAGS); (void) strlcpy(l_compat, compat, ZFS_MAXPROPLEN); for (file = strtok_r(l_compat, ",", &ps); file != NULL; file = strtok_r(NULL, ",", &ps)) { boolean_t l_features[SPA_FEATURES]; enum { Z_SYSCONF, Z_DATA } source; /* try sysconfdir first, then datadir */ source = Z_SYSCONF; if ((featfd = openat(sdirfd, file, O_RDONLY | O_CLOEXEC)) < 0) { featfd = openat(ddirfd, file, O_RDONLY | O_CLOEXEC); source = Z_DATA; } /* File readable and correct size? */ if (featfd < 0 || fstat(featfd, &fs) < 0 || fs.st_size < 1 || fs.st_size > ZPOOL_COMPAT_MAXSIZE) { (void) close(featfd); strlcat(err_badfile, file, ZFS_MAXPROPLEN); strlcat(err_badfile, " ", ZFS_MAXPROPLEN); ret_badfile = B_TRUE; continue; } /* Prefault the file if system allows */ #if defined(MAP_POPULATE) #define ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_POPULATE) #elif defined(MAP_PREFAULT_READ) #define ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_PREFAULT_READ) #else #define ZC_MMAP_FLAGS (MAP_PRIVATE) #endif /* private mmap() so we can strtok safely */ fc = (char *)mmap(NULL, fs.st_size, PROT_READ | PROT_WRITE, ZC_MMAP_FLAGS, featfd, 0); (void) close(featfd); /* map ok, and last character == newline? */ if (fc == MAP_FAILED || fc[fs.st_size - 1] != '\n') { (void) munmap((void *) fc, fs.st_size); strlcat(err_badfile, file, ZFS_MAXPROPLEN); strlcat(err_badfile, " ", ZFS_MAXPROPLEN); ret_badfile = B_TRUE; continue; } ret_nofiles = B_FALSE; for (uint_t i = 0; i < SPA_FEATURES; i++) l_features[i] = B_FALSE; /* replace final newline with NULL to ensure string ends */ fc[fs.st_size - 1] = '\0'; for (line = strtok_r(fc, "\n", &ls); line != NULL; line = strtok_r(NULL, "\n", &ls)) { /* discard comments */ char *r = strchr(line, '#'); if (r != NULL) *r = '\0'; for (word = strtok_r(line, ", \t", &ws); word != NULL; word = strtok_r(NULL, ", \t", &ws)) { /* Find matching feature name */ uint_t f; for (f = 0; f < SPA_FEATURES; f++) { zfeature_info_t *fi = &spa_feature_table[f]; if (strcmp(word, fi->fi_uname) == 0) { l_features[f] = B_TRUE; break; } } if (f < SPA_FEATURES) continue; /* found an unrecognized word */ /* lightly sanitize it */ if (strlen(word) > 32) word[32] = '\0'; for (char *c = word; *c != '\0'; c++) if (!isprint(*c)) *c = '?'; strlcat(err_badtoken, word, ZFS_MAXPROPLEN); strlcat(err_badtoken, " ", ZFS_MAXPROPLEN); if (source == Z_SYSCONF) ret_badtoken = B_TRUE; else ret_warntoken = B_TRUE; } } (void) munmap((void *) fc, fs.st_size); if (features != NULL) for (uint_t i = 0; i < SPA_FEATURES; i++) features[i] &= l_features[i]; } (void) close(sdirfd); (void) close(ddirfd); /* Return the most serious error */ if (ret_badfile) { if (report != NULL) snprintf(report, rlen, gettext("could not read/" "parse feature file(s): %s"), err_badfile); return (ZPOOL_COMPATIBILITY_BADFILE); } if (ret_nofiles) { if (report != NULL) strlcpy(report, gettext("no valid compatibility files specified"), rlen); return (ZPOOL_COMPATIBILITY_NOFILES); } if (ret_badtoken) { if (report != NULL) snprintf(report, rlen, gettext("invalid feature " "name(s) in local compatibility files: %s"), err_badtoken); return (ZPOOL_COMPATIBILITY_BADTOKEN); } if (ret_warntoken) { if (report != NULL) snprintf(report, rlen, gettext("unrecognized feature " "name(s) in distribution compatibility files: %s"), err_badtoken); return (ZPOOL_COMPATIBILITY_WARNTOKEN); } if (report != NULL) strlcpy(report, gettext("compatibility set ok"), rlen); return (ZPOOL_COMPATIBILITY_OK); } diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 908025f9a18d..ebf20956a213 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -1,916 +1,936 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2015 RackTop Systems. * Copyright (c) 2016, Intel Corporation. */ /* * Pool import support functions. * * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since * these commands are expected to run in the global zone, we can assume * that the devices are all readable when called. * * To import a pool, we rely on reading the configuration information from the * ZFS label of each device. If we successfully read the label, then we * organize the configuration information in the following hierarchy: * * pool guid -> toplevel vdev guid -> label txg * * Duplicate entries matching this same tuple will be discarded. Once we have * examined every device, we pick the best label txg config for each toplevel * vdev. We then arrange these toplevel vdevs into a complete pool config, and * update any paths that have changed. Finally, we attempt to import the pool * using our derived config, and record the results. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zutil_import.h" #ifdef HAVE_LIBUDEV #include #include #endif #include #define DEFAULT_IMPORT_PATH_SIZE 9 #define DEV_BYID_PATH "/dev/disk/by-id/" static boolean_t is_watchdog_dev(char *dev) { /* For 'watchdog' dev */ if (strcmp(dev, "watchdog") == 0) return (B_TRUE); /* For 'watchdog */ if (strstr(dev, "watchdog") == dev && isdigit(dev[8])) return (B_TRUE); return (B_FALSE); } int zfs_dev_flush(int fd) { return (ioctl(fd, BLKFLSBUF)); } void zpool_open_func(void *arg) { rdsk_node_t *rn = arg; libpc_handle_t *hdl = rn->rn_hdl; struct stat64 statbuf; nvlist_t *config; char *bname, *dupname; uint64_t vdev_guid = 0; int error; int num_labels = 0; int fd; /* * Skip devices with well known prefixes there can be side effects * when opening devices which need to be avoided. * * hpet - High Precision Event Timer * watchdog - Watchdog must be closed in a special way. */ dupname = zutil_strdup(hdl, rn->rn_name); bname = basename(dupname); error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname)); free(dupname); if (error) return; /* * Ignore failed stats. We only want regular files and block devices. */ if (stat64(rn->rn_name, &statbuf) != 0 || (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) return; /* * Preferentially open using O_DIRECT to bypass the block device * cache which may be stale for multipath devices. An EINVAL errno * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. */ fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC); if ((fd < 0) && (errno == EINVAL)) fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC); if ((fd < 0) && (errno == EACCES)) hdl->lpc_open_access_error = B_TRUE; if (fd < 0) return; /* * This file is too small to hold a zpool */ if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) { (void) close(fd); return; } error = zpool_read_label(fd, &config, &num_labels); if (error != 0) { (void) close(fd); return; } if (num_labels == 0) { (void) close(fd); nvlist_free(config); return; } /* * Check that the vdev is for the expected guid. Additional entries * are speculatively added based on the paths stored in the labels. * Entries with valid paths but incorrect guids must be removed. */ error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { (void) close(fd); nvlist_free(config); return; } (void) close(fd); rn->rn_config = config; rn->rn_num_labels = num_labels; /* * Add additional entries for paths described by this label. */ if (rn->rn_labelpaths) { char *path = NULL; char *devid = NULL; - char *env = NULL; rdsk_node_t *slice; avl_index_t where; - int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; - env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); - if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || - timeout < 0) { - timeout = DISK_LABEL_WAIT; - } - /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, timeout); + zpool_disk_wait(rn->rn_name); if (path != NULL) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); slice->rn_name = zutil_strdup(hdl, path); slice->rn_vdev_guid = vdev_guid; slice->rn_avl = rn->rn_avl; slice->rn_hdl = hdl; slice->rn_order = IMPORT_ORDER_PREFERRED_1; slice->rn_labelpaths = B_FALSE; pthread_mutex_lock(rn->rn_lock); if (avl_find(rn->rn_avl, slice, &where)) { pthread_mutex_unlock(rn->rn_lock); free(slice->rn_name); free(slice); } else { avl_insert(rn->rn_avl, slice, where); pthread_mutex_unlock(rn->rn_lock); zpool_open_func(slice); } } if (devid != NULL) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); error = asprintf(&slice->rn_name, "%s%s", DEV_BYID_PATH, devid); if (error == -1) { free(slice); return; } slice->rn_vdev_guid = vdev_guid; slice->rn_avl = rn->rn_avl; slice->rn_hdl = hdl; slice->rn_order = IMPORT_ORDER_PREFERRED_2; slice->rn_labelpaths = B_FALSE; pthread_mutex_lock(rn->rn_lock); if (avl_find(rn->rn_avl, slice, &where)) { pthread_mutex_unlock(rn->rn_lock); free(slice->rn_name); free(slice); } else { avl_insert(rn->rn_avl, slice, where); pthread_mutex_unlock(rn->rn_lock); zpool_open_func(slice); } } } } static char * zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ "/dev/mapper", /* Use multipath devices before components */ "/dev/disk/by-partlabel", /* Single unique entry set by user */ "/dev/disk/by-partuuid", /* Generated partition uuid */ "/dev/disk/by-label", /* Custom persistent labels */ "/dev/disk/by-uuid", /* Single unique entry and persistent */ "/dev/disk/by-id", /* May be multiple entries and persistent */ "/dev/disk/by-path", /* Encodes physical location and persistent */ "/dev" /* UNSAFE device names will change */ }; const char * const * zpool_default_search_paths(size_t *count) { *count = DEFAULT_IMPORT_PATH_SIZE; return ((const char * const *)zpool_default_import_path); } /* * Given a full path to a device determine if that device appears in the * import search path. If it does return the first match and store the * index in the passed 'order' variable, otherwise return an error. */ static int zfs_path_order(char *name, int *order) { int i = 0, error = ENOENT; char *dir, *env, *envdup; env = getenv("ZPOOL_IMPORT_PATH"); if (env) { envdup = strdup(env); dir = strtok(envdup, ":"); while (dir) { if (strncmp(name, dir, strlen(dir)) == 0) { *order = i; error = 0; break; } dir = strtok(NULL, ":"); i++; } free(envdup); } else { for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) { if (strncmp(name, zpool_default_import_path[i], strlen(zpool_default_import_path[i])) == 0) { *order = i; error = 0; break; } } } return (error); } /* * Use libblkid to quickly enumerate all known zfs devices. */ int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t **slice_cache) { rdsk_node_t *slice; blkid_cache cache; blkid_dev_iterate iter; blkid_dev dev; avl_index_t where; int error; *slice_cache = NULL; error = blkid_get_cache(&cache, NULL); if (error != 0) return (error); error = blkid_probe_all_new(cache); if (error != 0) { blkid_put_cache(cache); return (error); } iter = blkid_dev_iterate_begin(cache); if (iter == NULL) { blkid_put_cache(cache); return (EINVAL); } error = blkid_dev_set_search(iter, "TYPE", "zfs_member"); if (error != 0) { blkid_dev_iterate_end(iter); blkid_put_cache(cache); return (error); } *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); while (blkid_dev_next(iter, &dev) == 0) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); slice->rn_vdev_guid = 0; slice->rn_lock = lock; slice->rn_avl = *slice_cache; slice->rn_hdl = hdl; slice->rn_labelpaths = B_TRUE; error = zfs_path_order(slice->rn_name, &slice->rn_order); if (error == 0) slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; else slice->rn_order = IMPORT_ORDER_DEFAULT; pthread_mutex_lock(lock); if (avl_find(*slice_cache, slice, &where)) { free(slice->rn_name); free(slice); } else { avl_insert(*slice_cache, slice, where); } pthread_mutex_unlock(lock); } blkid_dev_iterate_end(iter); blkid_put_cache(cache); return (0); } /* * Linux persistent device strings for vdev labels * * based on libudev for consistency with libudev disk add/remove events */ typedef struct vdev_dev_strs { char vds_devid[128]; char vds_devphys[128]; } vdev_dev_strs_t; #ifdef HAVE_LIBUDEV /* * Obtain the persistent device id string (describes what) * * used by ZED vdev matching for auto-{online,expand,replace} */ int zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) { struct udev_list_entry *entry; const char *bus; char devbyid[MAXPATHLEN]; /* The bus based by-id path is preferred */ bus = udev_device_get_property_value(dev, "ID_BUS"); if (bus == NULL) { const char *dm_uuid; /* * For multipath nodes use the persistent uuid based identifier * * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f */ dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); if (dm_uuid != NULL) { (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); return (0); } /* * For volumes use the persistent /dev/zvol/dataset identifier */ entry = udev_device_get_devlinks_list_entry(dev); while (entry != NULL) { const char *name; name = udev_list_entry_get_name(entry); if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { (void) strlcpy(bufptr, name, buflen); return (0); } entry = udev_list_entry_get_next(entry); } /* * NVME 'by-id' symlinks are similar to bus case */ struct udev_device *parent; parent = udev_device_get_parent_with_subsystem_devtype(dev, "nvme", NULL); if (parent != NULL) bus = "nvme"; /* continue with bus symlink search */ else return (ENODATA); } /* * locate the bus specific by-id link */ (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); entry = udev_device_get_devlinks_list_entry(dev); while (entry != NULL) { const char *name; name = udev_list_entry_get_name(entry); if (strncmp(name, devbyid, strlen(devbyid)) == 0) { name += strlen(DEV_BYID_PATH); (void) strlcpy(bufptr, name, buflen); return (0); } entry = udev_list_entry_get_next(entry); } return (ENODATA); } /* * Obtain the persistent physical location string (describes where) * * used by ZED vdev matching for auto-{online,expand,replace} */ int zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) { const char *physpath = NULL; struct udev_list_entry *entry; /* * Normal disks use ID_PATH for their physical path. */ physpath = udev_device_get_property_value(dev, "ID_PATH"); if (physpath != NULL && strlen(physpath) > 0) { (void) strlcpy(bufptr, physpath, buflen); return (0); } /* * Device mapper devices are virtual and don't have a physical * path. For them we use ID_VDEV instead, which is setup via the * /etc/vdev_id.conf file. ID_VDEV provides a persistent path * to a virtual device. If you don't have vdev_id.conf setup, * you cannot use multipath autoreplace with device mapper. */ physpath = udev_device_get_property_value(dev, "ID_VDEV"); if (physpath != NULL && strlen(physpath) > 0) { (void) strlcpy(bufptr, physpath, buflen); return (0); } /* * For ZFS volumes use the persistent /dev/zvol/dataset identifier */ entry = udev_device_get_devlinks_list_entry(dev); while (entry != NULL) { physpath = udev_list_entry_get_name(entry); if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { (void) strlcpy(bufptr, physpath, buflen); return (0); } entry = udev_list_entry_get_next(entry); } /* * For all other devices fallback to using the by-uuid name. */ entry = udev_device_get_devlinks_list_entry(dev); while (entry != NULL) { physpath = udev_list_entry_get_name(entry); if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { (void) strlcpy(bufptr, physpath, buflen); return (0); } entry = udev_list_entry_get_next(entry); } return (ENODATA); } /* * A disk is considered a multipath whole disk when: * DEVNAME key value has "dm-" * DM_NAME key value has "mpath" prefix * DM_UUID key exists * ID_PART_TABLE_TYPE key does not exist or is not gpt */ static boolean_t udev_mpath_whole_disk(struct udev_device *dev) { const char *devname, *type, *uuid; devname = udev_device_get_property_value(dev, "DEVNAME"); type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); uuid = udev_device_get_property_value(dev, "DM_UUID"); if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && ((type == NULL) || (strcmp(type, "gpt") != 0)) && (uuid != NULL)) { return (B_TRUE); } return (B_FALSE); } static int udev_device_is_ready(struct udev_device *dev) { #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED return (udev_device_get_is_initialized(dev)); #else /* wait for DEVLINKS property to be initialized */ return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); #endif } #else /* ARGSUSED */ int zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) { return (ENODATA); } /* ARGSUSED */ int zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) { return (ENODATA); } #endif /* HAVE_LIBUDEV */ /* * Wait up to timeout_ms for udev to set up the device node. The device is * considered ready when libudev determines it has been initialized, all of * the device links have been verified to exist, and it has been allowed to * settle. At this point the device the device can be accessed reliably. * Depending on the complexity of the udev rules this process could take * several seconds. */ int zpool_label_disk_wait(const char *path, int timeout_ms) { #ifdef HAVE_LIBUDEV struct udev *udev; struct udev_device *dev = NULL; char nodepath[MAXPATHLEN]; char *sysname = NULL; int ret = ENODEV; int settle_ms = 50; long sleep_ms = 10; hrtime_t start, settle; if ((udev = udev_new()) == NULL) return (ENXIO); start = gethrtime(); settle = 0; do { if (sysname == NULL) { if (realpath(path, nodepath) != NULL) { sysname = strrchr(nodepath, '/') + 1; } else { (void) usleep(sleep_ms * MILLISEC); continue; } } dev = udev_device_new_from_subsystem_sysname(udev, "block", sysname); if ((dev != NULL) && udev_device_is_ready(dev)) { struct udev_list_entry *links, *link = NULL; ret = 0; links = udev_device_get_devlinks_list_entry(dev); udev_list_entry_foreach(link, links) { struct stat64 statbuf; const char *name; name = udev_list_entry_get_name(link); errno = 0; if (stat64(name, &statbuf) == 0 && errno == 0) continue; settle = 0; ret = ENODEV; break; } if (ret == 0) { if (settle == 0) { settle = gethrtime(); } else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) { udev_device_unref(dev); break; } } } udev_device_unref(dev); (void) usleep(sleep_ms * MILLISEC); } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); udev_unref(udev); return (ret); #else int settle_ms = 50; long sleep_ms = 10; hrtime_t start, settle; struct stat64 statbuf; start = gethrtime(); settle = 0; do { errno = 0; if ((stat64(path, &statbuf) == 0) && (errno == 0)) { if (settle == 0) settle = gethrtime(); else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) return (0); } else if (errno != ENOENT) { return (errno); } usleep(sleep_ms * MILLISEC); } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); return (ENODEV); #endif /* HAVE_LIBUDEV */ } +/* + * Simplified version of zpool_label_disk_wait() where we wait for a device + * to appear using the default timeouts. + */ +int +zpool_disk_wait(const char *path) +{ + int timeout; + timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS", + DISK_LABEL_WAIT); + + return (zpool_label_disk_wait(path, timeout)); +} + /* * Encode the persistent devices strings * used for the vdev disk label */ static int encode_device_strings(const char *path, vdev_dev_strs_t *ds, boolean_t wholedisk) { #ifdef HAVE_LIBUDEV struct udev *udev; struct udev_device *dev = NULL; char nodepath[MAXPATHLEN]; char *sysname; int ret = ENODEV; hrtime_t start; if ((udev = udev_new()) == NULL) return (ENXIO); /* resolve path to a runtime device node instance */ if (realpath(path, nodepath) == NULL) goto no_dev; sysname = strrchr(nodepath, '/') + 1; /* * Wait up to 3 seconds for udev to set up the device node context */ start = gethrtime(); do { dev = udev_device_new_from_subsystem_sysname(udev, "block", sysname); if (dev == NULL) goto no_dev; if (udev_device_is_ready(dev)) break; /* udev ready */ udev_device_unref(dev); dev = NULL; if (NSEC2MSEC(gethrtime() - start) < 10) (void) sched_yield(); /* yield/busy wait up to 10ms */ else (void) usleep(10 * MILLISEC); } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); if (dev == NULL) goto no_dev; /* * Only whole disks require extra device strings */ if (!wholedisk && !udev_mpath_whole_disk(dev)) goto no_dev; ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); if (ret != 0) goto no_dev_ref; /* physical location string (optional) */ if (zfs_device_get_physical(dev, ds->vds_devphys, sizeof (ds->vds_devphys)) != 0) { ds->vds_devphys[0] = '\0'; /* empty string --> not available */ } no_dev_ref: udev_device_unref(dev); no_dev: udev_unref(udev); return (ret); #else return (ENOENT); #endif } /* * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it * in the nvlist * (if applicable). Like: * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' * + * If an old path was in the nvlist, and the rescan can not find a new path, + * then keep the old path, since the disk may have been removed. + * + * path: The vdev path (value from ZPOOL_CONFIG_PATH) * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) */ void update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, const char *key) { char *upath, *spath; + char *oldpath = NULL; + + (void) nvlist_lookup_string(nv, key, &oldpath); /* Add enclosure sysfs path (if disk is in an enclosure). */ upath = zfs_get_underlying_path(path); spath = zfs_get_enclosure_sysfs_path(upath); if (spath) { (void) nvlist_add_string(nv, key, spath); } else { - (void) nvlist_remove_all(nv, key); + /* + * We couldn't dynamically scan the disk's enclosure sysfs path. + * This could be because the disk went away. If there's an old + * enclosure sysfs path in the nvlist, then keep using it. + */ + if (!oldpath) { + (void) nvlist_remove_all(nv, key); + } } free(upath); free(spath); } /* * This will get called for each leaf vdev. */ static int sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) { char *path = NULL; if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); /* Rescan our enclosure sysfs path for this vdev */ update_vdev_config_dev_sysfs_path(nv, path, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); return (0); } /* * Given an nvlist for our pool (with vdev tree), iterate over all the * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH. */ void update_vdevs_config_dev_sysfs_path(nvlist_t *config) { nvlist_t *nvroot = NULL; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL); } /* * Update a leaf vdev's persistent device strings * * - only applies for a dedicated leaf vdev (aka whole disk) * - updated during pool create|add|attach|import * - used for matching device matching during auto-{online,expand,replace} * - stored in a leaf disk config label (i.e. alongside 'path' NVP) * - these strings are currently not used in kernel (i.e. for vdev_disk_open) * * single device node example: * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' * * multipath device node example: * devid: 'dm-uuid-mpath-35000c5006304de3f' * * We also store the enclosure sysfs path for turning on enclosure LEDs * (if applicable): * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' */ void update_vdev_config_dev_strs(nvlist_t *nv) { vdev_dev_strs_t vds; char *env, *type, *path; uint64_t wholedisk = 0; /* * For the benefit of legacy ZFS implementations, allow * for opting out of devid strings in the vdev label. * * example use: * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer * * explanation: * Older OpenZFS implementations had issues when attempting to * display pool config VDEV names if a "devid" NVP value is * present in the pool's config. * * For example, a pool that originated on illumos platform would * have a devid value in the config and "zpool status" would fail * when listing the config. * * A pool can be stripped of any "devid" values on import or * prevented from adding them on zpool create|add by setting * ZFS_VDEV_DEVID_OPT_OUT. */ env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); if (env && (strtoul(env, NULL, 0) > 0 || !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); return; } if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || strcmp(type, VDEV_TYPE_DISK) != 0) { return; } if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); /* * Update device string values in the config nvlist. */ if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); if (vds.vds_devphys[0] != '\0') { (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, vds.vds_devphys); } update_vdev_config_dev_sysfs_path(nv, path, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); } else { /* Clear out any stale entries. */ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); } } diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 98f138957ba6..b9a0b67f2b8c 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1,1903 +1,2001 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2015 RackTop Systems. * Copyright (c) 2016, Intel Corporation. * Copyright (c) 2021, Colm Buckley */ /* * Pool import support functions. * * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since * these commands are expected to run in the global zone, we can assume * that the devices are all readable when called. * * To import a pool, we rely on reading the configuration information from the * ZFS label of each device. If we successfully read the label, then we * organize the configuration information in the following hierarchy: * * pool guid -> toplevel vdev guid -> label txg * * Duplicate entries matching this same tuple will be discarded. Once we have * examined every device, we pick the best label txg config for each toplevel * vdev. We then arrange these toplevel vdevs into a complete pool config, and * update any paths that have changed. Finally, we attempt to import the pool * using our derived config, and record the results. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zutil_import.h" /*PRINTFLIKE2*/ static void zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...) { va_list ap; va_start(ap, fmt); (void) vsnprintf(hdl->lpc_desc, sizeof (hdl->lpc_desc), fmt, ap); hdl->lpc_desc_active = B_TRUE; va_end(ap); } static void zutil_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap) { char action[1024]; (void) vsnprintf(action, sizeof (action), fmt, ap); if (hdl->lpc_desc_active) hdl->lpc_desc_active = B_FALSE; else hdl->lpc_desc[0] = '\0'; if (hdl->lpc_printerr) { if (hdl->lpc_desc[0] != '\0') error = hdl->lpc_desc; (void) fprintf(stderr, "%s: %s\n", action, error); } } /*PRINTFLIKE3*/ static int zutil_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); zutil_verror(hdl, error, fmt, ap); va_end(ap); return (-1); } static int zutil_error(libpc_handle_t *hdl, const char *error, const char *msg) { return (zutil_error_fmt(hdl, error, "%s", msg)); } static int zutil_no_memory(libpc_handle_t *hdl) { zutil_error(hdl, EZFS_NOMEM, "internal error"); exit(1); } void * zutil_alloc(libpc_handle_t *hdl, size_t size) { void *data; if ((data = calloc(1, size)) == NULL) (void) zutil_no_memory(hdl); return (data); } char * zutil_strdup(libpc_handle_t *hdl, const char *str) { char *ret; if ((ret = strdup(str)) == NULL) (void) zutil_no_memory(hdl); return (ret); } /* * Intermediate structures used to gather configuration information. */ typedef struct config_entry { uint64_t ce_txg; nvlist_t *ce_config; struct config_entry *ce_next; } config_entry_t; typedef struct vdev_entry { uint64_t ve_guid; config_entry_t *ve_configs; struct vdev_entry *ve_next; } vdev_entry_t; typedef struct pool_entry { uint64_t pe_guid; vdev_entry_t *pe_vdevs; struct pool_entry *pe_next; } pool_entry_t; typedef struct name_entry { char *ne_name; uint64_t ne_guid; uint64_t ne_order; uint64_t ne_num_labels; struct name_entry *ne_next; } name_entry_t; typedef struct pool_list { pool_entry_t *pools; name_entry_t *names; } pool_list_t; /* * Go through and fix up any path and/or devid information for the given vdev * configuration. */ static int fix_paths(libpc_handle_t *hdl, nvlist_t *nv, name_entry_t *names) { nvlist_t **child; uint_t c, children; uint64_t guid; name_entry_t *ne, *best; char *path; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if (fix_paths(hdl, child[c], names) != 0) return (-1); return (0); } /* * This is a leaf (file or disk) vdev. In either case, go through * the name list and see if we find a matching guid. If so, replace * the path and see if we can calculate a new devid. * * There may be multiple names associated with a particular guid, in * which case we have overlapping partitions or multiple paths to the * same disk. In this case we prefer to use the path name which * matches the ZPOOL_CONFIG_PATH. If no matching entry is found we * use the lowest order device which corresponds to the first match * while traversing the ZPOOL_IMPORT_PATH search path. */ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) path = NULL; best = NULL; for (ne = names; ne != NULL; ne = ne->ne_next) { if (ne->ne_guid == guid) { if (path == NULL) { best = ne; break; } if ((strlen(path) == strlen(ne->ne_name)) && strncmp(path, ne->ne_name, strlen(path)) == 0) { best = ne; break; } if (best == NULL) { best = ne; continue; } /* Prefer paths with move vdev labels. */ if (ne->ne_num_labels > best->ne_num_labels) { best = ne; continue; } /* Prefer paths earlier in the search order. */ if (ne->ne_num_labels == best->ne_num_labels && ne->ne_order < best->ne_order) { best = ne; continue; } } } if (best == NULL) return (0); if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) return (-1); update_vdev_config_dev_strs(nv); return (0); } /* * Add the given configuration to the list of known devices. */ static int add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, int order, int num_labels, nvlist_t *config) { uint64_t pool_guid, vdev_guid, top_guid, txg, state; pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; name_entry_t *ne; /* * If this is a hot spare not currently in use or level 2 cache * device, add it to the list of names to translate, but don't do * anything else. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state) == 0 && (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL) return (-1); if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) { free(ne); return (-1); } ne->ne_guid = vdev_guid; ne->ne_order = order; ne->ne_num_labels = num_labels; ne->ne_next = pl->names; pl->names = ne; return (0); } /* * If we have a valid config but cannot read any of these fields, then * it means we have a half-initialized label. In vdev_label_init() * we write a label with txg == 0 so that we can identify the device * in case the user refers to the same disk later on. If we fail to * create the pool, we'll be left with a label in this state * which should not be considered part of a valid pool. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0 || nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) != 0 || nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid) != 0 || nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0) { return (0); } /* * First, see if we know about this pool. If not, then add it to the * list of known pools. */ for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { if (pe->pe_guid == pool_guid) break; } if (pe == NULL) { if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) { return (-1); } pe->pe_guid = pool_guid; pe->pe_next = pl->pools; pl->pools = pe; } /* * Second, see if we know about this toplevel vdev. Add it if its * missing. */ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { if (ve->ve_guid == top_guid) break; } if (ve == NULL) { if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { return (-1); } ve->ve_guid = top_guid; ve->ve_next = pe->pe_vdevs; pe->pe_vdevs = ve; } /* * Third, see if we have a config with a matching transaction group. If * so, then we do nothing. Otherwise, add it to the list of known * configs. */ for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { if (ce->ce_txg == txg) break; } if (ce == NULL) { if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) { return (-1); } ce->ce_txg = txg; ce->ce_config = fnvlist_dup(config); ce->ce_next = ve->ve_configs; ve->ve_configs = ce; } /* * At this point we've successfully added our config to the list of * known configs. The last thing to do is add the vdev guid -> path * mappings so that we can fix up the configuration as necessary before * doing the import. */ if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL) return (-1); if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) { free(ne); return (-1); } ne->ne_guid = vdev_guid; ne->ne_order = order; ne->ne_num_labels = num_labels; ne->ne_next = pl->names; pl->names = ne; return (0); } static int zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid, boolean_t *isactive) { ASSERT(hdl->lpc_ops->pco_pool_active != NULL); int error = hdl->lpc_ops->pco_pool_active(hdl->lpc_lib_handle, name, guid, isactive); return (error); } static nvlist_t * zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig) { ASSERT(hdl->lpc_ops->pco_refresh_config != NULL); return (hdl->lpc_ops->pco_refresh_config(hdl->lpc_lib_handle, tryconfig)); } /* * Determine if the vdev id is a hole in the namespace. */ static boolean_t vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) { int c; for (c = 0; c < holes; c++) { /* Top-level is a hole */ if (hole_array[c] == id) return (B_TRUE); } return (B_FALSE); } /* * Convert our list of pools into the definitive set of configurations. We * start by picking the best config for each toplevel vdev. Once that's done, * we assemble the toplevel vdevs into a full config for the pool. We make a * pass to fix up any incorrect paths, and then add it to the main list to * return to the user. */ static nvlist_t * get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, nvlist_t *policy) { pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; nvlist_t **spares, **l2cache; uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; char *name, *hostname = NULL; uint64_t guid; uint_t children = 0; nvlist_t **child = NULL; uint64_t *hole_array, max_id; uint_t c; boolean_t isactive; nvlist_t *nvl; boolean_t valid_top_config = B_FALSE; if (nvlist_alloc(&ret, 0, 0) != 0) goto nomem; for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { uint64_t id, max_txg = 0, hostid = 0; uint_t holes = 0; if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) goto nomem; config_seen = B_FALSE; /* * Iterate over all toplevel vdevs. Grab the pool configuration * from the first one we find, and then go through the rest and * add them as necessary to the 'vdevs' member of the config. */ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { /* * Determine the best configuration for this vdev by * selecting the config with the latest transaction * group. */ best_txg = 0; for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { if (ce->ce_txg > best_txg) { tmp = ce->ce_config; best_txg = ce->ce_txg; } } /* * We rely on the fact that the max txg for the * pool will contain the most up-to-date information * about the valid top-levels in the vdev namespace. */ if (best_txg > max_txg) { (void) nvlist_remove(config, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64); (void) nvlist_remove(config, ZPOOL_CONFIG_HOLE_ARRAY, DATA_TYPE_UINT64_ARRAY); max_txg = best_txg; hole_array = NULL; holes = 0; max_id = 0; valid_top_config = B_FALSE; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { verify(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, max_id) == 0); valid_top_config = B_TRUE; } if (nvlist_lookup_uint64_array(tmp, ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, &holes) == 0) { verify(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, hole_array, holes) == 0); } } if (!config_seen) { /* * Copy the relevant pieces of data to the pool * configuration: * * version * pool guid * name * comment (if available) * compatibility features (if available) * pool state * hostid (if available) * hostname (if available) */ uint64_t state, version; char *comment = NULL; char *compatibility = NULL; version = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_VERSION); fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, version); guid = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_GUID); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, guid); name = fnvlist_lookup_string(tmp, ZPOOL_CONFIG_POOL_NAME); fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, name); if (nvlist_lookup_string(tmp, ZPOOL_CONFIG_COMMENT, &comment) == 0) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, comment); if (nvlist_lookup_string(tmp, ZPOOL_CONFIG_COMPATIBILITY, &compatibility) == 0) fnvlist_add_string(config, ZPOOL_CONFIG_COMPATIBILITY, compatibility); state = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_STATE); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state); hostid = 0; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); hostname = fnvlist_lookup_string(tmp, ZPOOL_CONFIG_HOSTNAME); fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, hostname); } config_seen = B_TRUE; } /* * Add this top-level vdev to the child array. */ verify(nvlist_lookup_nvlist(tmp, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, &id) == 0); if (id >= children) { nvlist_t **newchild; newchild = zutil_alloc(hdl, (id + 1) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; for (c = 0; c < children; c++) newchild[c] = child[c]; free(child); child = newchild; children = id + 1; } if (nvlist_dup(nvtop, &child[id], 0) != 0) goto nomem; } /* * If we have information about all the top-levels then * clean up the nvlist which we've constructed. This * means removing any extraneous devices that are * beyond the valid range or adding devices to the end * of our array which appear to be missing. */ if (valid_top_config) { if (max_id < children) { for (c = max_id; c < children; c++) nvlist_free(child[c]); children = max_id; } else if (max_id > children) { nvlist_t **newchild; newchild = zutil_alloc(hdl, (max_id) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; for (c = 0; c < children; c++) newchild[c] = child[c]; free(child); child = newchild; children = max_id; } } verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); /* * The vdev namespace may contain holes as a result of * device removal. We must add them back into the vdev * tree before we process any missing devices. */ if (holes > 0) { ASSERT(valid_top_config); for (c = 0; c < children; c++) { nvlist_t *holey; if (child[c] != NULL || !vdev_is_hole(hole_array, holes, c)) continue; if (nvlist_alloc(&holey, NV_UNIQUE_NAME, 0) != 0) goto nomem; /* * Holes in the namespace are treated as * "hole" top-level vdevs and have a * special flag set on them. */ if (nvlist_add_string(holey, ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) != 0 || nvlist_add_uint64(holey, ZPOOL_CONFIG_ID, c) != 0 || nvlist_add_uint64(holey, ZPOOL_CONFIG_GUID, 0ULL) != 0) { nvlist_free(holey); goto nomem; } child[c] = holey; } } /* * Look for any missing top-level vdevs. If this is the case, * create a faked up 'missing' vdev as a placeholder. We cannot * simply compress the child array, because the kernel performs * certain checks to make sure the vdev IDs match their location * in the configuration. */ for (c = 0; c < children; c++) { if (child[c] == NULL) { nvlist_t *missing; if (nvlist_alloc(&missing, NV_UNIQUE_NAME, 0) != 0) goto nomem; if (nvlist_add_string(missing, ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) != 0 || nvlist_add_uint64(missing, ZPOOL_CONFIG_ID, c) != 0 || nvlist_add_uint64(missing, ZPOOL_CONFIG_GUID, 0ULL) != 0) { nvlist_free(missing); goto nomem; } child[c] = missing; } } /* * Put all of this pool's top-level vdevs into a root vdev. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) goto nomem; if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, child, children) != 0) { nvlist_free(nvroot); goto nomem; } for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); children = 0; child = NULL; /* * Go through and fix up any paths and/or devids based on our * known list of vdev GUID -> path mappings. */ if (fix_paths(hdl, nvroot, pl->names) != 0) { nvlist_free(nvroot); goto nomem; } /* * Add the root vdev to this pool's configuration. */ if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) != 0) { nvlist_free(nvroot); goto nomem; } nvlist_free(nvroot); /* * zdb uses this path to report on active pools that were * imported or created using -R. */ if (active_ok) goto add_pool; /* * Determine if this pool is currently active, in which case we * can't actually import it. */ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); if (zutil_pool_active(hdl, name, guid, &isactive) != 0) goto error; if (isactive) { nvlist_free(config); config = NULL; continue; } if (policy != NULL) { if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, policy) != 0) goto nomem; } if ((nvl = zutil_refresh_config(hdl, config)) == NULL) { nvlist_free(config); config = NULL; continue; } nvlist_free(config); config = nvl; /* * Go through and update the paths for spares, now that we have * them. */ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { for (i = 0; i < nspares; i++) { if (fix_paths(hdl, spares[i], pl->names) != 0) goto nomem; } } /* * Update the paths for l2cache devices. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { for (i = 0; i < nl2cache; i++) { if (fix_paths(hdl, l2cache[i], pl->names) != 0) goto nomem; } } /* * Restore the original information read from the actual label. */ (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, DATA_TYPE_UINT64); (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, DATA_TYPE_STRING); if (hostid != 0) { verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, hostname) == 0); } add_pool: /* * Add this pool to the list of configs. */ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); if (nvlist_add_nvlist(ret, name, config) != 0) goto nomem; nvlist_free(config); config = NULL; } return (ret); nomem: (void) zutil_no_memory(hdl); error: nvlist_free(config); nvlist_free(ret); for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); return (NULL); } /* * Return the offset of the given label. */ static uint64_t label_offset(uint64_t size, int l) { ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); } /* * The same description applies as to zpool_read_label below, * except here we do it without aio, presumably because an aio call * errored out in a way we think not using it could circumvent. */ static int zpool_read_label_slow(int fd, nvlist_t **config, int *num_labels) { struct stat64 statbuf; int l, count = 0; vdev_phys_t *label; nvlist_t *expected_config = NULL; uint64_t expected_guid = 0, size; int error; *config = NULL; if (fstat64_blk(fd, &statbuf) == -1) return (0); size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); error = posix_memalign((void **)&label, PAGESIZE, sizeof (*label)); if (error) return (-1); for (l = 0; l < VDEV_LABELS; l++) { uint64_t state, guid, txg; off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE; if (pread64(fd, label, sizeof (vdev_phys_t), offset) != sizeof (vdev_phys_t)) continue; if (nvlist_unpack(label->vp_nvlist, sizeof (label->vp_nvlist), config, 0) != 0) continue; if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID, &guid) != 0 || guid == 0) { nvlist_free(*config); continue; } if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || state > POOL_STATE_L2CACHE) { nvlist_free(*config); continue; } if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0)) { nvlist_free(*config); continue; } if (expected_guid) { if (expected_guid == guid) count++; nvlist_free(*config); } else { expected_config = *config; expected_guid = guid; count++; } } if (num_labels != NULL) *num_labels = count; free(label); *config = expected_config; return (0); } /* * Given a file descriptor, read the label information and return an nvlist * describing the configuration, if there is one. The number of valid * labels found will be returned in num_labels when non-NULL. */ int zpool_read_label(int fd, nvlist_t **config, int *num_labels) { struct stat64 statbuf; struct aiocb aiocbs[VDEV_LABELS]; struct aiocb *aiocbps[VDEV_LABELS]; vdev_phys_t *labels; nvlist_t *expected_config = NULL; uint64_t expected_guid = 0, size; int error, l, count = 0; *config = NULL; if (fstat64_blk(fd, &statbuf) == -1) return (0); size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); error = posix_memalign((void **)&labels, PAGESIZE, VDEV_LABELS * sizeof (*labels)); if (error) return (-1); memset(aiocbs, 0, sizeof (aiocbs)); for (l = 0; l < VDEV_LABELS; l++) { off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE; aiocbs[l].aio_fildes = fd; aiocbs[l].aio_offset = offset; aiocbs[l].aio_buf = &labels[l]; aiocbs[l].aio_nbytes = sizeof (vdev_phys_t); aiocbs[l].aio_lio_opcode = LIO_READ; aiocbps[l] = &aiocbs[l]; } if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) { int saved_errno = errno; boolean_t do_slow = B_FALSE; error = -1; if (errno == EAGAIN || errno == EINTR || errno == EIO) { /* * A portion of the requests may have been submitted. * Clean them up. */ for (l = 0; l < VDEV_LABELS; l++) { errno = 0; switch (aio_error(&aiocbs[l])) { case EINVAL: break; case EINPROGRESS: // This shouldn't be possible to // encounter, die if we do. ASSERT(B_FALSE); fallthrough; case EOPNOTSUPP: case ENOSYS: do_slow = B_TRUE; fallthrough; case 0: default: (void) aio_return(&aiocbs[l]); } } } if (do_slow) { /* * At least some IO involved access unsafe-for-AIO * files. Let's try again, without AIO this time. */ error = zpool_read_label_slow(fd, config, num_labels); saved_errno = errno; } free(labels); errno = saved_errno; return (error); } for (l = 0; l < VDEV_LABELS; l++) { uint64_t state, guid, txg; if (aio_return(&aiocbs[l]) != sizeof (vdev_phys_t)) continue; if (nvlist_unpack(labels[l].vp_nvlist, sizeof (labels[l].vp_nvlist), config, 0) != 0) continue; if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID, &guid) != 0 || guid == 0) { nvlist_free(*config); continue; } if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || state > POOL_STATE_L2CACHE) { nvlist_free(*config); continue; } if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0)) { nvlist_free(*config); continue; } if (expected_guid) { if (expected_guid == guid) count++; nvlist_free(*config); } else { expected_config = *config; expected_guid = guid; count++; } } if (num_labels != NULL) *num_labels = count; free(labels); *config = expected_config; return (0); } /* * Sorted by full path and then vdev guid to allow for multiple entries with * the same full path name. This is required because it's possible to * have multiple block devices with labels that refer to the same * ZPOOL_CONFIG_PATH yet have different vdev guids. In this case both * entries need to be added to the cache. Scenarios where this can occur * include overwritten pool labels, devices which are visible from multiple * hosts and multipath devices. */ int slice_cache_compare(const void *arg1, const void *arg2) { const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; uint64_t guid1 = ((rdsk_node_t *)arg1)->rn_vdev_guid; uint64_t guid2 = ((rdsk_node_t *)arg2)->rn_vdev_guid; int rv; rv = TREE_ISIGN(strcmp(nm1, nm2)); if (rv) return (rv); return (TREE_CMP(guid1, guid2)); } static int label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid, uint64_t vdev_guid, char **path, char **devid) { nvlist_t **child; uint_t c, children; uint64_t guid; char *val; int error; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) { error = label_paths_impl(hdl, child[c], pool_guid, vdev_guid, path, devid); if (error) return (error); } return (0); } if (nvroot == NULL) return (0); error = nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_GUID, &guid); if ((error != 0) || (guid != vdev_guid)) return (0); error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &val); if (error == 0) *path = val; error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_DEVID, &val); if (error == 0) *devid = val; return (0); } /* * Given a disk label fetch the ZPOOL_CONFIG_PATH and ZPOOL_CONFIG_DEVID * and store these strings as config_path and devid_path respectively. * The returned pointers are only valid as long as label remains valid. */ int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid) { nvlist_t *nvroot; uint64_t pool_guid; uint64_t vdev_guid; *path = NULL; *devid = NULL; if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid) || nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid)) return (ENOENT); return (label_paths_impl(hdl, nvroot, pool_guid, vdev_guid, path, devid)); } static void zpool_find_import_scan_add_slice(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t *cache, const char *path, const char *name, int order) { avl_index_t where; rdsk_node_t *slice; slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); if (asprintf(&slice->rn_name, "%s/%s", path, name) == -1) { free(slice); return; } slice->rn_vdev_guid = 0; slice->rn_lock = lock; slice->rn_avl = cache; slice->rn_hdl = hdl; slice->rn_order = order + IMPORT_ORDER_SCAN_OFFSET; slice->rn_labelpaths = B_FALSE; pthread_mutex_lock(lock); if (avl_find(cache, slice, &where)) { free(slice->rn_name); free(slice); } else { avl_insert(cache, slice, where); } pthread_mutex_unlock(lock); } static int zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t *cache, const char *dir, int order) { int error; char path[MAXPATHLEN]; struct dirent64 *dp; DIR *dirp; if (realpath(dir, path) == NULL) { error = errno; if (error == ENOENT) return (0); zutil_error_aux(hdl, strerror(error)); (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir); return (error); } dirp = opendir(path); if (dirp == NULL) { error = errno; zutil_error_aux(hdl, strerror(error)); (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); return (error); } while ((dp = readdir64(dirp)) != NULL) { const char *name = dp->d_name; if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; zpool_find_import_scan_add_slice(hdl, lock, cache, path, name, order); } (void) closedir(dirp); return (0); } static int zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t *cache, const char *dir, int order) { int error = 0; char path[MAXPATHLEN]; char *d, *b; char *dpath, *name; /* * Separate the directory part and last part of the * path. We do this so that we can get the realpath of * the directory. We don't get the realpath on the * whole path because if it's a symlink, we want the * path of the symlink not where it points to. */ d = zutil_strdup(hdl, dir); b = zutil_strdup(hdl, dir); dpath = dirname(d); name = basename(b); if (realpath(dpath, path) == NULL) { error = errno; if (error == ENOENT) { error = 0; goto out; } zutil_error_aux(hdl, strerror(error)); (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir); goto out; } zpool_find_import_scan_add_slice(hdl, lock, cache, path, name, order); out: free(b); free(d); return (error); } /* * Scan a list of directories for zfs devices. */ static int zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t **slice_cache, const char * const *dir, size_t dirs) { avl_tree_t *cache; rdsk_node_t *slice; void *cookie; int i, error; *slice_cache = NULL; cache = zutil_alloc(hdl, sizeof (avl_tree_t)); avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); for (i = 0; i < dirs; i++) { struct stat sbuf; if (stat(dir[i], &sbuf) != 0) { error = errno; if (error == ENOENT) continue; zutil_error_aux(hdl, strerror(error)); (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]); goto error; } /* * If dir[i] is a directory, we walk through it and add all * the entries to the cache. If it's not a directory, we just * add it to the cache. */ if (S_ISDIR(sbuf.st_mode)) { if ((error = zpool_find_import_scan_dir(hdl, lock, cache, dir[i], i)) != 0) goto error; } else { if ((error = zpool_find_import_scan_path(hdl, lock, cache, dir[i], i)) != 0) goto error; } } *slice_cache = cache; return (0); error: cookie = NULL; while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) { free(slice->rn_name); free(slice); } free(cache); return (error); } /* * Given a list of directories to search, find all pools stored on disk. This * includes partial pools which are not available to import. If no args are * given (argc is 0), then the default directory (/dev/dsk) is searched. * poolname or guid (but not both) are provided by the caller when trying * to import a specific pool. */ static nvlist_t * zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg, pthread_mutex_t *lock, avl_tree_t *cache) { nvlist_t *ret = NULL; pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; rdsk_node_t *slice; void *cookie; tpool_t *t; verify(iarg->poolname == NULL || iarg->guid == 0); /* * Create a thread pool to parallelize the process of reading and * validating labels, a large number of threads can be used due to * minimal contention. */ t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); for (slice = avl_first(cache); slice; (slice = avl_walk(cache, slice, AVL_AFTER))) (void) tpool_dispatch(t, zpool_open_func, slice); tpool_wait(t); tpool_destroy(t); /* * Process the cache, filtering out any entries which are not * for the specified pool then adding matching label configs. */ cookie = NULL; while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) { if (slice->rn_config != NULL) { nvlist_t *config = slice->rn_config; boolean_t matched = B_TRUE; boolean_t aux = B_FALSE; int fd; /* * Check if it's a spare or l2cache device. If it is, * we need to skip the name and guid check since they * don't exist on aux device label. */ if (iarg->poolname != NULL || iarg->guid != 0) { uint64_t state; aux = nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state) == 0 && (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE); } if (iarg->poolname != NULL && !aux) { char *pname; matched = nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0 && strcmp(iarg->poolname, pname) == 0; } else if (iarg->guid != 0 && !aux) { uint64_t this_guid; matched = nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 && iarg->guid == this_guid; } if (matched) { /* * Verify all remaining entries can be opened * exclusively. This will prune all underlying * multipath devices which otherwise could * result in the vdev appearing as UNAVAIL. * * Under zdb, this step isn't required and * would prevent a zdb -e of active pools with * no cachefile. */ fd = open(slice->rn_name, O_RDONLY | O_EXCL | O_CLOEXEC); if (fd >= 0 || iarg->can_be_active) { if (fd >= 0) close(fd); add_config(hdl, &pools, slice->rn_name, slice->rn_order, slice->rn_num_labels, config); } } nvlist_free(config); } free(slice->rn_name); free(slice); } avl_destroy(cache); free(cache); ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy); for (pe = pools.pools; pe != NULL; pe = penext) { penext = pe->pe_next; for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { venext = ve->ve_next; for (ce = ve->ve_configs; ce != NULL; ce = cenext) { cenext = ce->ce_next; nvlist_free(ce->ce_config); free(ce); } free(ve); } free(pe); } for (ne = pools.names; ne != NULL; ne = nenext) { nenext = ne->ne_next; free(ne->ne_name); free(ne); } return (ret); } /* * Given a config, discover the paths for the devices which * exist in the config. */ static int discover_cached_paths(libpc_handle_t *hdl, nvlist_t *nv, avl_tree_t *cache, pthread_mutex_t *lock) { char *path = NULL; uint_t children; nvlist_t **child; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (int c = 0; c < children; c++) { discover_cached_paths(hdl, child[c], cache, lock); } } /* * Once we have the path, we need to add the directory to * our directory cache. */ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { return (zpool_find_import_scan_dir(hdl, lock, cache, dirname(path), 0)); } return (0); } /* * Given a cache file, return the contents as a list of importable pools. * poolname or guid (but not both) are provided by the caller when trying * to import a specific pool. */ static nvlist_t * zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg) { char *buf; int fd; struct stat64 statbuf; nvlist_t *raw, *src, *dst; nvlist_t *pools; nvpair_t *elem; char *name; uint64_t this_guid; boolean_t active; verify(iarg->poolname == NULL || iarg->guid == 0); if ((fd = open(iarg->cachefile, O_RDONLY | O_CLOEXEC)) < 0) { zutil_error_aux(hdl, "%s", strerror(errno)); (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to open cache file")); return (NULL); } if (fstat64(fd, &statbuf) != 0) { zutil_error_aux(hdl, "%s", strerror(errno)); (void) close(fd); (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to get size of cache file")); return (NULL); } if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) { (void) close(fd); return (NULL); } if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { (void) close(fd); free(buf); (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to read cache file contents")); return (NULL); } (void) close(fd); if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { free(buf); (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "invalid or corrupt cache file contents")); return (NULL); } free(buf); /* * Go through and get the current state of the pools and refresh their * state. */ if (nvlist_alloc(&pools, 0, 0) != 0) { (void) zutil_no_memory(hdl); nvlist_free(raw); return (NULL); } elem = NULL; while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { src = fnvpair_value_nvlist(elem); name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME); if (iarg->poolname != NULL && strcmp(iarg->poolname, name) != 0) continue; this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID); if (iarg->guid != 0 && iarg->guid != this_guid) continue; if (zutil_pool_active(hdl, name, this_guid, &active) != 0) { nvlist_free(raw); nvlist_free(pools); return (NULL); } if (active) continue; if (iarg->scan) { uint64_t saved_guid = iarg->guid; const char *saved_poolname = iarg->poolname; pthread_mutex_t lock; /* * Create the device cache that will hold the * devices we will scan based on the cachefile. * This will get destroyed and freed by * zpool_find_import_impl. */ avl_tree_t *cache = zutil_alloc(hdl, sizeof (avl_tree_t)); avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); nvlist_t *nvroot = fnvlist_lookup_nvlist(src, ZPOOL_CONFIG_VDEV_TREE); /* * We only want to find the pool with this_guid. * We will reset these values back later. */ iarg->guid = this_guid; iarg->poolname = NULL; /* * We need to build up a cache of devices that exists * in the paths pointed to by the cachefile. This allows * us to preserve the device namespace that was * originally specified by the user but also lets us * scan devices in those directories in case they had * been renamed. */ pthread_mutex_init(&lock, NULL); discover_cached_paths(hdl, nvroot, cache, &lock); nvlist_t *nv = zpool_find_import_impl(hdl, iarg, &lock, cache); pthread_mutex_destroy(&lock); /* * zpool_find_import_impl will return back * a list of pools that it found based on the * device cache. There should only be one pool * since we're looking for a specific guid. * We will use that pool to build up the final * pool nvlist which is returned back to the * caller. */ nvpair_t *pair = nvlist_next_nvpair(nv, NULL); if (pair == NULL) continue; fnvlist_add_nvlist(pools, nvpair_name(pair), fnvpair_value_nvlist(pair)); VERIFY3P(nvlist_next_nvpair(nv, pair), ==, NULL); iarg->guid = saved_guid; iarg->poolname = saved_poolname; continue; } if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE, iarg->cachefile) != 0) { (void) zutil_no_memory(hdl); nvlist_free(raw); nvlist_free(pools); return (NULL); } update_vdevs_config_dev_sysfs_path(src); if ((dst = zutil_refresh_config(hdl, src)) == NULL) { nvlist_free(raw); nvlist_free(pools); return (NULL); } if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { (void) zutil_no_memory(hdl); nvlist_free(dst); nvlist_free(raw); nvlist_free(pools); return (NULL); } nvlist_free(dst); } nvlist_free(raw); return (pools); } static nvlist_t * zpool_find_import(libpc_handle_t *hdl, importargs_t *iarg) { pthread_mutex_t lock; avl_tree_t *cache; nvlist_t *pools = NULL; verify(iarg->poolname == NULL || iarg->guid == 0); pthread_mutex_init(&lock, NULL); /* * Locate pool member vdevs by blkid or by directory scanning. * On success a newly allocated AVL tree which is populated with an * entry for each discovered vdev will be returned in the cache. * It's the caller's responsibility to consume and destroy this tree. */ if (iarg->scan || iarg->paths != 0) { size_t dirs = iarg->paths; const char * const *dir = (const char * const *)iarg->path; if (dirs == 0) dir = zpool_default_search_paths(&dirs); if (zpool_find_import_scan(hdl, &lock, &cache, dir, dirs) != 0) { pthread_mutex_destroy(&lock); return (NULL); } } else { if (zpool_find_import_blkid(hdl, &lock, &cache) != 0) { pthread_mutex_destroy(&lock); return (NULL); } } pools = zpool_find_import_impl(hdl, iarg, &lock, cache); pthread_mutex_destroy(&lock); return (pools); } nvlist_t * zpool_search_import(void *hdl, importargs_t *import, const pool_config_ops_t *pco) { libpc_handle_t handle = { 0 }; nvlist_t *pools = NULL; handle.lpc_lib_handle = hdl; handle.lpc_ops = pco; handle.lpc_printerr = B_TRUE; verify(import->poolname == NULL || import->guid == 0); if (import->cachefile != NULL) pools = zpool_find_import_cached(&handle, import); else pools = zpool_find_import(&handle, import); if ((pools == NULL || nvlist_empty(pools)) && handle.lpc_open_access_error && geteuid() != 0) { (void) zutil_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN, "no pools found")); } return (pools); } static boolean_t pool_match(nvlist_t *cfg, char *tgt) { uint64_t v, guid = strtoull(tgt, NULL, 0); char *s; if (guid != 0) { if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) return (v == guid); } else { if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) return (strcmp(s, tgt) == 0); } return (B_FALSE); } int zpool_find_config(void *hdl, const char *target, nvlist_t **configp, importargs_t *args, const pool_config_ops_t *pco) { nvlist_t *pools; nvlist_t *match = NULL; nvlist_t *config = NULL; char *sepp = NULL; int count = 0; char *targetdup = strdup(target); *configp = NULL; if ((sepp = strpbrk(targetdup, "/@")) != NULL) *sepp = '\0'; pools = zpool_search_import(hdl, args, pco); if (pools != NULL) { nvpair_t *elem = NULL; while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { VERIFY0(nvpair_value_nvlist(elem, &config)); if (pool_match(config, targetdup)) { count++; if (match != NULL) { /* multiple matches found */ continue; } else { match = fnvlist_dup(config); } } } fnvlist_free(pools); } if (count == 0) { free(targetdup); return (ENOENT); } if (count > 1) { free(targetdup); fnvlist_free(match); return (EINVAL); } *configp = match; free(targetdup); return (0); } +/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */ +static boolean_t +vdev_is_leaf(nvlist_t *nv) +{ + uint_t children = 0; + nvlist_t **child; + + (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); + + return (children == 0); +} + +/* Return if a vdev is a leaf vdev and a real device (disk or file) */ +static boolean_t +vdev_is_real_leaf(nvlist_t *nv) +{ + char *type = NULL; + if (!vdev_is_leaf(nv)) + return (B_FALSE); + + (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type); + if ((strcmp(type, VDEV_TYPE_DISK) == 0) || + (strcmp(type, VDEV_TYPE_FILE) == 0)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * This function is called by our FOR_EACH_VDEV() macros. + * + * state: State machine status (stored inside of a (nvlist_t *)) + * nv: The current vdev nvlist_t we are iterating over. + * last_nv: The previous vdev nvlist_t we returned to the user in + * the last iteration of FOR_EACH_VDEV(). We use it + * to find the next vdev nvlist_t we should return. + * real_leaves_only: Only return leaf vdevs. + * + * Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if + * we're still searching for it. + */ +static int +__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, + boolean_t real_leaves_only) +{ + enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2}; + + /* The very first entry in the NV list is a special case */ + if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + /* + * We came across our last_nv, meaning the next one is the one we + * want + */ + if (nv == *((nvlist_t **)last_nv)) { + /* Next iteration of this function will return the nvlist_t */ + *((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH; + return (0); + } + + /* + * We marked NEXT_IS_MATCH on the previous iteration, so this is the one + * we want. + */ + if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + return (0); +} + +int +for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE)); +} + +int +for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv, + void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE)); +} + /* * Internal function for iterating over the vdevs. * * For each vdev, func() will be called and will be passed 'zhp' (which is * typically the zpool_handle_t cast as a void pointer), the vdev's nvlist, and * a user-defined data pointer). * * The return values from all the func() calls will be OR'd together and * returned. */ int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, void *data) { nvlist_t **child; uint_t c, children; int ret = 0; int i; char *type; const char *list[] = { ZPOOL_CONFIG_SPARES, ZPOOL_CONFIG_L2CACHE, ZPOOL_CONFIG_CHILDREN }; for (i = 0; i < ARRAY_SIZE(list); i++) { if (nvlist_lookup_nvlist_array(nv, list[i], &child, &children) == 0) { for (c = 0; c < children; c++) { uint64_t ishole = 0; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &ishole); if (ishole) continue; ret |= for_each_vdev_cb(zhp, child[c], func, data); } } } if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) return (ret); /* Don't run our function on root vdevs */ if (strcmp(type, VDEV_TYPE_ROOT) != 0) { ret |= func(zhp, nv, data); } return (ret); } /* * Given an ZPOOL_CONFIG_VDEV_TREE nvpair, iterate over all the vdevs, calling * func() for each one. func() is passed the vdev's nvlist and an optional * user-defined 'data' pointer. */ int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data) { return (for_each_vdev_cb(NULL, nvroot, func, data)); } diff --git a/lib/libzutil/zutil_pool.c b/lib/libzutil/zutil_pool.c index 734650f3cffc..eeb7c589ee05 100644 --- a/lib/libzutil/zutil_pool.c +++ b/lib/libzutil/zutil_pool.c @@ -1,145 +1,176 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include +#include #include static void dump_ddt_stat(const ddt_stat_t *dds, int h) { char refcnt[6]; char blocks[6], lsize[6], psize[6], dsize[6]; char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; if (dds == NULL || dds->dds_blocks == 0) return; if (h == -1) (void) strcpy(refcnt, "Total"); else zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); zfs_nicebytes(dds->dds_lsize, lsize, sizeof (lsize)); zfs_nicebytes(dds->dds_psize, psize, sizeof (psize)); zfs_nicebytes(dds->dds_dsize, dsize, sizeof (dsize)); zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); zfs_nicebytes(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); zfs_nicebytes(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); zfs_nicebytes(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", refcnt, blocks, lsize, psize, dsize, ref_blocks, ref_lsize, ref_psize, ref_dsize); } /* * Print the DDT histogram and the column totals. */ void zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) { int h; (void) printf("\n"); (void) printf("bucket " " allocated " " referenced \n"); (void) printf("______ " "______________________________ " "______________________________\n"); (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", "refcnt", "blocks", "LSIZE", "PSIZE", "DSIZE", "blocks", "LSIZE", "PSIZE", "DSIZE"); (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", "------", "------", "-----", "-----", "-----", "------", "-----", "-----", "-----"); for (h = 0; h < 64; h++) dump_ddt_stat(&ddh->ddh_stat[h], h); dump_ddt_stat(dds_total, -1); (void) printf("\n"); } /* * Process the buffer of nvlists, unpacking and storing each nvlist record * into 'records'. 'leftover' is set to the number of bytes that weren't * processed as there wasn't a complete record. */ int zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, nvlist_t ***records, uint_t *numrecords) { uint64_t reclen; nvlist_t *nv; int i; void *tmp; while (bytes_read > sizeof (reclen)) { /* get length of packed record (stored as little endian) */ for (i = 0, reclen = 0; i < sizeof (reclen); i++) reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i); if (bytes_read < sizeof (reclen) + reclen) break; /* unpack record */ if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0) return (ENOMEM); bytes_read -= sizeof (reclen) + reclen; buf += sizeof (reclen) + reclen; /* add record to nvlist array */ (*numrecords)++; if (ISP2(*numrecords + 1)) { tmp = realloc(*records, *numrecords * 2 * sizeof (nvlist_t *)); if (tmp == NULL) { nvlist_free(nv); (*numrecords)--; return (ENOMEM); } *records = tmp; } (*records)[*numrecords - 1] = nv; } *leftover = bytes_read; return (0); } + +/* + * Floating point sleep(). Allows you to pass in a floating point value for + * seconds. + */ +void +fsleep(float sec) +{ + struct timespec req; + req.tv_sec = floor(sec); + req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; + nanosleep(&req, NULL); +} + +/* + * Get environment variable 'env' and return it as an integer. + * If 'env' is not set, then return 'default_val' instead. + */ +int +zpool_getenv_int(const char *env, int default_val) +{ + char *str; + int val; + str = getenv(env); + if ((str == NULL) || sscanf(str, "%d", &val) != 1 || + val < 0) { + val = default_val; + } + return (val); +} diff --git a/man/man8/zpool-clear.8 b/man/man8/zpool-clear.8 index 0b256b28bd21..19861a319000 100644 --- a/man/man8/zpool-clear.8 +++ b/man/man8/zpool-clear.8 @@ -1,59 +1,70 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" .Dd May 27, 2021 .Dt ZPOOL-CLEAR 8 .Os . .Sh NAME .Nm zpool-clear .Nd clear device errors in ZFS storage pool .Sh SYNOPSIS .Nm zpool .Cm clear +.Op Fl -power .Ar pool .Oo Ar device Oc Ns … . .Sh DESCRIPTION Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared. .Pp If the pool was suspended it will be brought back online provided the devices can be accessed. Pools with .Sy multihost enabled which have been suspended cannot be resumed. While the pool was suspended, it may have been imported on another host, and resuming I/O could result in pool damage. +.Bl -tag -width Ds +.It Fl -power +Power on the devices's slot in the storage enclosure and wait for the device +to show up before attempting to clear errors. +This is done on all the devices specified. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +Note: This flag currently works on Linux only. +.El . .Sh SEE ALSO .Xr zdb 8 , .Xr zpool-reopen 8 , .Xr zpool-status 8 diff --git a/man/man8/zpool-offline.8 b/man/man8/zpool-offline.8 index 9b2cf59cf414..011cefed2f13 100644 --- a/man/man8/zpool-offline.8 +++ b/man/man8/zpool-offline.8 @@ -1,94 +1,106 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" .Dd August 9, 2019 .Dt ZPOOL-OFFLINE 8 .Os . .Sh NAME .Nm zpool-offline .Nd take physical devices offline in ZFS storage pool .Sh SYNOPSIS .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Nm zpool .Cm online -.Op Fl e +.Op Fl Sy -power +.Op Fl Sy e .Ar pool .Ar device Ns … . .Sh DESCRIPTION .Bl -tag -width Ds .It Xo .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Xc Takes the specified physical device offline. While the .Ar device is offline, no attempt is made to read or write to the device. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power off the device's slot in the storage enclosure. +This flag currently works on Linux only .It Fl f Force fault. Instead of offlining the disk, put it into a faulted state. The fault will persist across imports unless the .Fl t flag was specified. .It Fl t Temporary. Upon reboot, the specified physical device reverts to its previous state. .El .It Xo .Nm zpool .Cm online +.Op Fl -power .Op Fl e .Ar pool .Ar device Ns … .Xc Brings the specified physical device online. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power on the device's slot in the storage enclosure and wait for the device +to show up before attempting to online it. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +This flag currently works on Linux only .It Fl e Expand the device to use all available space. If the device is part of a mirror or raidz then all devices must be expanded before the new space will become available to the pool. .El .El . .Sh SEE ALSO .Xr zpool-detach 8 , .Xr zpool-remove 8 , .Xr zpool-reopen 8 , .Xr zpool-resilver 8 diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 7c825f69d8e2..2f0514a2998d 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -1,134 +1,136 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" .Dd June 2, 2021 .Dt ZPOOL-STATUS 8 .Os . .Sh NAME .Nm zpool-status .Nd show detailed health status for ZFS storage pools .Sh SYNOPSIS .Nm zpool .Cm status .Op Fl DigLpPstvx .Op Fl T Sy u Ns | Ns Sy d .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … .Oo Ar pool Oc Ns … .Op Ar interval Op Ar count . .Sh DESCRIPTION Displays the detailed health status for the given pools. If no .Ar pool is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the .Sx Device Failure and Recovery section of .Xr zpoolconcepts 7 . .Pp If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change. .Bl -tag -width Ds +.It Fl -power +Display vdev enclosure slot power status (on or off). .It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … Run a script (or scripts) on each vdev and include the output as a new column in the .Nm zpool Cm status output. See the .Fl c option of .Nm zpool Cm iostat for complete details. .It Fl i Display vdev initialization status. .It Fl g Display vdev GUIDs instead of the normal device names These GUIDs can be used in place of device names for the zpool detach/offline/remove/replace commands. .It Fl L Display real paths for vdevs resolving all symbolic links. This can be used to look up the current block device name regardless of the .Pa /dev/disk/ path used to open it. .It Fl p Display numbers in parsable (exact) values. .It Fl P Display full paths for vdevs instead of only the last component of the path. This can be used in conjunction with the .Fl L flag. .It Fl D Display a histogram of deduplication statistics, showing the allocated .Pq physically present on disk and referenced .Pq logically referenced in the pool block counts and sizes by reference count. .It Fl s Display the number of leaf VDEV slow IOs. This is the number of IOs that didn't complete in .Sy zio_slow_io_ms milliseconds (default 30 seconds). This does not necessarily mean the IOs failed to complete, just took an unreasonably long amount of time. This may indicate a problem with the underlying storage. .It Fl t Display vdev TRIM status. .It Fl T Sy u Ns | Ns Sy d Display a time stamp. Specify .Sy u for a printed representation of the internal representation of time. See .Xr time 2 . Specify .Sy d for standard date format. See .Xr date 1 . .It Fl v Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub. .It Fl x Only display status for pools that are exhibiting errors or are otherwise unavailable. Warnings about pools not using the latest on-disk format will not be included. .El . .Sh SEE ALSO .Xr zpool-events 8 , .Xr zpool-history 8 , .Xr zpool-iostat 8 , .Xr zpool-list 8 , .Xr zpool-resilver 8 , .Xr zpool-scrub 8 , .Xr zpool-wait 8 diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index e5d7c8515177..591c7772c749 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -1,562 +1,579 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" .Dd June 2, 2021 .Dt ZPOOL 8 .Os . .Sh NAME .Nm zpool .Nd configure ZFS storage pools .Sh SYNOPSIS .Nm .Fl ?V .Nm .Cm version .Nm .Cm subcommand .Op Ar argumentss . .Sh DESCRIPTION The .Nm command configures ZFS storage pools. A storage pool is a collection of devices that provides physical storage and data replication for ZFS datasets. All datasets within a storage pool share the same space. See .Xr zfs 8 for information on managing datasets. .Pp For an overview of creating and managing ZFS storage pools see the .Xr zpoolconcepts 7 manual page. . .Sh SUBCOMMANDS All subcommands that modify state are logged persistently to the pool in their original form. .Pp The .Nm command provides subcommands to create and destroy storage pools, add capacity to storage pools, and provide information about the storage pools. The following subcommands are supported: .Bl -tag -width Ds .It Xo .Nm .Fl ?\& .Xc Displays a help message. .It Xo .Nm .Fl V , -version .Xc .It Xo .Nm .Cm version .Xc Displays the software version of the .Nm userland utility and the ZFS kernel module. .El . .Ss Creation .Bl -tag -width Ds .It Xr zpool-create 8 Creates a new storage pool containing the virtual devices specified on the command line. .It Xr zpool-initialize 8 Begins initializing by writing to all unallocated regions on the specified devices, or all eligible devices in the pool if no individual devices are specified. .El . .Ss Destruction .Bl -tag -width Ds .It Xr zpool-destroy 8 Destroys the given pool, freeing up any devices for other use. .It Xr zpool-labelclear 8 Removes ZFS label information from the specified .Ar device . .El . .Ss Virtual Devices .Bl -tag -width Ds .It Xo .Xr zpool-attach 8 Ns / Ns Xr zpool-detach 8 .Xc Increases or decreases redundancy by .Cm attach Ns ing or .Cm detach Ns ing a device on an existing vdev (virtual device). .It Xo .Xr zpool-add 8 Ns / Ns Xr zpool-remove 8 .Xc Adds the specified virtual devices to the given pool, or removes the specified device from the pool. .It Xr zpool-replace 8 Replaces an existing device (which may be faulted) with a new one. .It Xr zpool-split 8 Creates a new pool by splitting all mirrors in an existing pool (which decreases its redundancy). .El . .Ss Properties Available pool properties listed in the .Xr zpoolprops 7 manual page. .Bl -tag -width Ds .It Xr zpool-list 8 Lists the given pools along with a health status and space usage. .It Xo .Xr zpool-get 8 Ns / Ns Xr zpool-set 8 .Xc Retrieves the given list of properties .Po or all properties if .Sy all is used .Pc for the specified storage pool(s). .El . .Ss Monitoring .Bl -tag -width Ds .It Xr zpool-status 8 Displays the detailed health status for the given pools. .It Xr zpool-iostat 8 Displays logical I/O statistics for the given pools/vdevs. Physical I/Os may be observed via .Xr iostat 1 . .It Xr zpool-events 8 Lists all recent events generated by the ZFS kernel modules. These events are consumed by the .Xr zed 8 and used to automate administrative tasks such as replacing a failed device with a hot spare. That manual page also describes the subclasses and event payloads that can be generated. .It Xr zpool-history 8 Displays the command history of the specified pool(s) or all pools if no pool is specified. .El . .Ss Maintenance .Bl -tag -width Ds .It Xr zpool-scrub 8 Begins a scrub or resumes a paused scrub. .It Xr zpool-checkpoint 8 Checkpoints the current state of .Ar pool , which can be later restored by .Nm zpool Cm import Fl -rewind-to-checkpoint . .It Xr zpool-trim 8 Initiates an immediate on-demand TRIM operation for all of the free space in a pool. This operation informs the underlying storage devices of all blocks in the pool which are no longer allocated and allows thinly provisioned devices to reclaim the space. .It Xr zpool-sync 8 This command forces all in-core dirty data to be written to the primary pool storage and not the ZIL. It will also update administrative information including quota reporting. Without arguments, .Nm zpool Cm sync will sync all pools on the system. Otherwise, it will sync only the specified pool(s). .It Xr zpool-upgrade 8 Manage the on-disk format version of storage pools. .It Xr zpool-wait 8 Waits until all background activity of the given types has ceased in the given pool. .El . .Ss Fault Resolution .Bl -tag -width Ds .It Xo .Xr zpool-offline 8 Ns / Ns Xr zpool-online 8 .Xc Takes the specified physical device offline or brings it online. .It Xr zpool-resilver 8 Starts a resilver. If an existing resilver is already running it will be restarted from the beginning. .It Xr zpool-reopen 8 Reopen all the vdevs associated with the pool. .It Xr zpool-clear 8 Clears device errors in a pool. .El . .Ss Import & Export .Bl -tag -width Ds .It Xr zpool-import 8 Make disks containing ZFS storage pools available for use on the system. .It Xr zpool-export 8 Exports the given pools from the system. .It Xr zpool-reguid 8 Generates a new unique identifier for the pool. .El . .Sh EXIT STATUS The following exit values are returned: .Bl -tag -compact -offset 4n -width "a" .It Sy 0 Successful completion. .It Sy 1 An error occurred. .It Sy 2 Invalid command line options were specified. .El . .Sh EXAMPLES .Bl -tag -width "Exam" .It Sy Example 1 : No Creating a RAID-Z Storage Pool The following command creates a pool with a single raidz root vdev that consists of six disks: .Dl # Nm zpool Cm create Ar tank Sy raidz Ar sda sdb sdc sdd sde sdf . .It Sy Example 2 : No Creating a Mirrored Storage Pool The following command creates a pool with two mirrors, where each mirror contains two disks: .Dl # Nm zpool Cm create Ar tank Sy mirror Ar sda sdb Sy mirror Ar sdc sdd . .It Sy Example 3 : No Creating a ZFS Storage Pool by Using Partitions The following command creates an unmirrored pool using two disk partitions: .Dl # Nm zpool Cm create Ar tank sda1 sdb2 . .It Sy Example 4 : No Creating a ZFS Storage Pool by Using Files The following command creates an unmirrored pool using files. While not recommended, a pool based on files can be useful for experimental purposes. .Dl # Nm zpool Cm create Ar tank /path/to/file/a /path/to/file/b . .It Sy Example 5 : No Adding a Mirror to a ZFS Storage Pool The following command adds two mirrored disks to the pool .Ar tank , assuming the pool is already made up of two-way mirrors. The additional space is immediately available to any datasets within the pool. .Dl # Nm zpool Cm add Ar tank Sy mirror Ar sda sdb . .It Sy Example 6 : No Listing Available ZFS Storage Pools The following command lists all available pools on the system. In this case, the pool .Ar zion is faulted due to a missing device. The results from this command are similar to the following: .Bd -literal -compact -offset Ds .No # Nm zpool Cm list NAME SIZE ALLOC FREE EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT rpool 19.9G 8.43G 11.4G - 33% 42% 1.00x ONLINE - tank 61.5G 20.0G 41.5G - 48% 32% 1.00x ONLINE - zion - - - - - - - FAULTED - .Ed . .It Sy Example 7 : No Destroying a ZFS Storage Pool The following command destroys the pool .Ar tank and any datasets contained within: .Dl # Nm zpool Cm destroy Fl f Ar tank . .It Sy Example 8 : No Exporting a ZFS Storage Pool The following command exports the devices in pool .Ar tank so that they can be relocated or later imported: .Dl # Nm zpool Cm export Ar tank . .It Sy Example 9 : No Importing a ZFS Storage Pool The following command displays available pools, and then imports the pool .Ar tank for use on the system. The results from this command are similar to the following: .Bd -literal -compact -offset Ds .No # Nm zpool Cm import pool: tank id: 15451357997522795478 state: ONLINE action: The pool can be imported using its name or numeric identifier. config: tank ONLINE mirror ONLINE sda ONLINE sdb ONLINE .No # Nm zpool Cm import Ar tank .Ed . .It Sy Example 10 : No Upgrading All ZFS Storage Pools to the Current Version The following command upgrades all ZFS Storage pools to the current version of the software: .Bd -literal -compact -offset Ds .No # Nm zpool Cm upgrade Fl a This system is currently running ZFS version 2. .Ed . .It Sy Example 11 : No Managing Hot Spares The following command creates a new pool with an available hot spare: .Dl # Nm zpool Cm create Ar tank Sy mirror Ar sda sdb Sy spare Ar sdc .Pp If one of the disks were to fail, the pool would be reduced to the degraded state. The failed device can be replaced using the following command: .Dl # Nm zpool Cm replace Ar tank sda sdd .Pp Once the data has been resilvered, the spare is automatically removed and is made available for use should another device fail. The hot spare can be permanently removed from the pool using the following command: .Dl # Nm zpool Cm remove Ar tank sdc . .It Sy Example 12 : No Creating a ZFS Pool with Mirrored Separate Intent Logs The following command creates a ZFS storage pool consisting of two, two-way mirrors and mirrored log devices: .Dl # Nm zpool Cm create Ar pool Sy mirror Ar sda sdb Sy mirror Ar sdc sdd Sy log mirror Ar sde sdf . .It Sy Example 13 : No Adding Cache Devices to a ZFS Pool The following command adds two disks for use as cache devices to a ZFS storage pool: .Dl # Nm zpool Cm add Ar pool Sy cache Ar sdc sdd .Pp Once added, the cache devices gradually fill with content from main memory. Depending on the size of your cache devices, it could take over an hour for them to fill. Capacity and reads can be monitored using the .Cm iostat subcommand as follows: .Dl # Nm zpool Cm iostat Fl v Ar pool 5 . .It Sy Example 14 : No Removing a Mirrored top-level (Log or Data) Device The following commands remove the mirrored log device .Sy mirror-2 and mirrored top-level data device .Sy mirror-1 . .Pp Given this configuration: .Bd -literal -compact -offset Ds pool: tank state: ONLINE scrub: none requested config: NAME STATE READ WRITE CKSUM tank ONLINE 0 0 0 mirror-0 ONLINE 0 0 0 sda ONLINE 0 0 0 sdb ONLINE 0 0 0 mirror-1 ONLINE 0 0 0 sdc ONLINE 0 0 0 sdd ONLINE 0 0 0 logs mirror-2 ONLINE 0 0 0 sde ONLINE 0 0 0 sdf ONLINE 0 0 0 .Ed .Pp The command to remove the mirrored log .Ar mirror-2 No is: .Dl # Nm zpool Cm remove Ar tank mirror-2 .Pp The command to remove the mirrored data .Ar mirror-1 No is: .Dl # Nm zpool Cm remove Ar tank mirror-1 . .It Sy Example 15 : No Displaying expanded space on a device The following command displays the detailed information for the pool .Ar data . This pool is comprised of a single raidz vdev where one of its devices increased its capacity by 10GB. In this example, the pool will not be able to utilize this extra capacity until all the devices under the raidz vdev have been expanded. .Bd -literal -compact -offset Ds .No # Nm zpool Cm list Fl v Ar data NAME SIZE ALLOC FREE EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT data 23.9G 14.6G 9.30G - 48% 61% 1.00x ONLINE - raidz1 23.9G 14.6G 9.30G - 48% sda - - - - - sdb - - - 10G - sdc - - - - - .Ed . .It Sy Example 16 : No Adding output columns Additional columns can be added to the .Nm zpool Cm status No and Nm zpool Cm iostat No output with Fl c . .Bd -literal -compact -offset Ds .No # Nm zpool Cm status Fl c Ar vendor , Ns Ar model , Ns Ar size NAME STATE READ WRITE CKSUM vendor model size tank ONLINE 0 0 0 mirror-0 ONLINE 0 0 0 U1 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T U10 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T U11 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T U12 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T U13 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T U14 ONLINE 0 0 0 SEAGATE ST8000NM0075 7.3T .No # Nm zpool Cm iostat Fl vc Ar size capacity operations bandwidth pool alloc free read write read write size ---------- ----- ----- ----- ----- ----- ----- ---- rpool 14.6G 54.9G 4 55 250K 2.69M sda1 14.6G 54.9G 4 55 250K 2.69M 70G ---------- ----- ----- ----- ----- ----- ----- ---- .Ed .El . .Sh ENVIRONMENT VARIABLES -.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE" .It Sy ZFS_ABORT Cause .Nm to dump core on exit for the purposes of running .Sy ::findleaks . .It Sy ZFS_COLOR Use ANSI color in .Nm zpool status and .Nm zpool iostat output. +.It Sy ZPOOL_AUTO_POWER_ON_SLOT +Automatically attempt to turn on the drives enclosure slot power to a drive when +running the +.Nm zpool Cm online +or +.Nm zpool Cm clear +commands. +This has the same effect as passing the +.Fl -power +option to those commands. +.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS +The maximum time in milliseconds to wait for a slot power sysfs value +to return the correct value after writing it. +For example, after writing "on" to the sysfs enclosure slot power_control file, +it can take some time for the enclosure to power down the slot and return +"on" if you read back the 'power_control' value. +Defaults to 30 seconds (30000ms) if not set. .It Sy ZPOOL_IMPORT_PATH The search path for devices or files to use with the pool. This is a colon-separated list of directories in which .Nm looks for device nodes and files. Similar to the .Fl d option in .Nm zpool import . .It Sy ZPOOL_IMPORT_UDEV_TIMEOUT_MS The maximum time in milliseconds that .Nm zpool import will wait for an expected device to be available. .It Sy ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE If set, suppress warning about non-native vdev ashift in .Nm zpool status . The value is not used, only the presence or absence of the variable matters. .It Sy ZPOOL_VDEV_NAME_GUID Cause .Nm subcommands to output vdev guids by default. This behavior is identical to the .Nm zpool Cm status Fl g command line option. .It Sy ZPOOL_VDEV_NAME_FOLLOW_LINKS Cause .Nm subcommands to follow links for vdev names by default. This behavior is identical to the .Nm zpool Cm status Fl L command line option. .It Sy ZPOOL_VDEV_NAME_PATH Cause .Nm subcommands to output full vdev path names by default. This behavior is identical to the .Nm zpool Cm status Fl P command line option. .It Sy ZFS_VDEV_DEVID_OPT_OUT Older OpenZFS implementations had issues when attempting to display pool config VDEV names if a .Sy devid NVP value is present in the pool's config. .Pp For example, a pool that originated on illumos platform would have a .Sy devid value in the config and .Nm zpool status would fail when listing the config. This would also be true for future Linux-based pools. .Pp A pool can be stripped of any .Sy devid values on import or prevented from adding them on .Nm zpool Cm create or .Nm zpool Cm add by setting .Sy ZFS_VDEV_DEVID_OPT_OUT . .Pp .It Sy ZPOOL_SCRIPTS_AS_ROOT Allow a privileged user to run .Nm zpool status/iostat Fl c . Normally, only unprivileged users are allowed to run .Fl c . .It Sy ZPOOL_SCRIPTS_PATH The search path for scripts when running .Nm zpool status/iostat Fl c . This is a colon-separated list of directories and overrides the default .Pa ~/.zpool.d and .Pa /etc/zfs/zpool.d search paths. .It Sy ZPOOL_SCRIPTS_ENABLED Allow a user to run .Nm zpool status/iostat Fl c . If .Sy ZPOOL_SCRIPTS_ENABLED is not set, it is assumed that the user is allowed to run .Nm zpool Cm status Ns / Ns Cm iostat Fl c . .El . .Sh INTERFACE STABILITY .Sy Evolving . .Sh SEE ALSO .Xr zfs 4 , .Xr zpool-features 7 , .Xr zpoolconcepts 7 , .Xr zpoolprops 7 , .Xr zed 8 , .Xr zfs 8 , .Xr zpool-add 8 , .Xr zpool-attach 8 , .Xr zpool-checkpoint 8 , .Xr zpool-clear 8 , .Xr zpool-create 8 , .Xr zpool-destroy 8 , .Xr zpool-detach 8 , .Xr zpool-events 8 , .Xr zpool-export 8 , .Xr zpool-get 8 , .Xr zpool-history 8 , .Xr zpool-import 8 , .Xr zpool-initialize 8 , .Xr zpool-iostat 8 , .Xr zpool-labelclear 8 , .Xr zpool-list 8 , .Xr zpool-offline 8 , .Xr zpool-online 8 , .Xr zpool-reguid 8 , .Xr zpool-remove 8 , .Xr zpool-reopen 8 , .Xr zpool-replace 8 , .Xr zpool-resilver 8 , .Xr zpool-scrub 8 , .Xr zpool-set 8 , .Xr zpool-split 8 , .Xr zpool-status 8 , .Xr zpool-sync 8 , .Xr zpool-trim 8 , .Xr zpool-upgrade 8 , .Xr zpool-wait 8