diff --git a/module/os/freebsd/zfs/zio_crypt.c b/module/os/freebsd/zfs/zio_crypt.c index c55c1ac25117..ea120bcb5b03 100644 --- a/module/os/freebsd/zfs/zio_crypt.c +++ b/module/os/freebsd/zfs/zio_crypt.c @@ -1,1814 +1,1829 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2017, Datto, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include /* * This file is responsible for handling all of the details of generating * encryption parameters and performing encryption and authentication. * * BLOCK ENCRYPTION PARAMETERS: * Encryption /Authentication Algorithm Suite (crypt): * The encryption algorithm, mode, and key length we are going to use. We * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit * keys. All authentication is currently done with SHA512-HMAC. * * Plaintext: * The unencrypted data that we want to encrypt. * * Initialization Vector (IV): * An initialization vector for the encryption algorithms. This is used to * "tweak" the encryption algorithms so that two blocks of the same data are * encrypted into different ciphertext outputs, thus obfuscating block patterns. * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is * never reused with the same encryption key. This value is stored unencrypted * and must simply be provided to the decryption function. We use a 96 bit IV * (as recommended by NIST) for all block encryption. For non-dedup blocks we * derive the IV randomly. The first 64 bits of the IV are stored in the second * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of * level 0 blocks is the number of allocated dnodes in that block. The on-disk * format supports at most 2^15 slots per L0 dnode block, because the maximum * block size is 16MB (2^24). In either case, for level 0 blocks this number * will still be smaller than UINT32_MAX so it is safe to store the IV in the * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count * for the dnode code. * * Master key: * This is the most important secret data of an encrypted dataset. It is used * along with the salt to generate that actual encryption keys via HKDF. We * do not use the master key to directly encrypt any data because there are * theoretical limits on how much data can actually be safely encrypted with * any encryption mode. The master key is stored encrypted on disk with the * user's wrapping key. Its length is determined by the encryption algorithm. * For details on how this is stored see the block comment in dsl_crypt.c * * Salt: * Used as an input to the HKDF function, along with the master key. We use a * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt * can be used for encrypting many blocks, so we cache the current salt and the * associated derived key in zio_crypt_t so we do not need to derive it again * needlessly. * * Encryption Key: * A secret binary key, generated from an HKDF function used to encrypt and * decrypt data. * * Message Authentication Code (MAC) * The MAC is an output of authenticated encryption modes such as AES-GCM and * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted * data on disk and return garbage to the application. Effectively, it is a * checksum that can not be reproduced by an attacker. We store the MAC in the * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated * regular checksum of the ciphertext which can be used for scrubbing. * * OBJECT AUTHENTICATION: * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because * they contain some info that always needs to be readable. To prevent this * data from being altered, we authenticate this data using SHA512-HMAC. This * will produce a MAC (similar to the one produced via encryption) which can * be used to verify the object was not modified. HMACs do not require key * rotation or IVs, so we can keep up to the full 3 copies of authenticated * data. * * ZIL ENCRYPTION: * ZIL blocks have their bp written to disk ahead of the associated data, so we * cannot store the MAC there as we normally do. For these blocks the MAC is * stored in the embedded checksum within the zil_chain_t header. The salt and * IV are generated for the block on bp allocation instead of at encryption * time. In addition, ZIL blocks have some pieces that must be left in plaintext * for claiming even though all of the sensitive user data still needs to be * encrypted. The function zio_crypt_init_uios_zil() handles parsing which * pieces of the block need to be encrypted. All data that is not encrypted is * authenticated using the AAD mechanisms that the supported encryption modes * provide for. In order to preserve the semantics of the ZIL for encrypted * datasets, the ZIL is not protected at the objset level as described below. * * DNODE ENCRYPTION: * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left * in plaintext for scrubbing and claiming, but the bonus buffers might contain * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing * which pieces of the block need to be encrypted. For more details about * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). * * OBJECT SET AUTHENTICATION: * Up to this point, everything we have encrypted and authenticated has been * at level 0 (or -2 for the ZIL). If we did not do any further work the * on-disk format would be susceptible to attacks that deleted or rearranged * the order of level 0 blocks. Ideally, the cleanest solution would be to * maintain a tree of authentication MACs going up the bp tree. However, this * presents a problem for raw sends. Send files do not send information about * indirect blocks so there would be no convenient way to transfer the MACs and * they cannot be recalculated on the receive side without the master key which * would defeat one of the purposes of raw sends in the first place. Instead, * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs * from the level below. We also include some portable fields from blk_prop such * as the lsize and compression algorithm to prevent the data from being * misinterpreted. * * At the objset level, we maintain 2 separate 256 bit MACs in the * objset_phys_t. The first one is "portable" and is the logical root of the * MAC tree maintained in the metadnode's bps. The second, is "local" and is * used as the root MAC for the user accounting objects, which are also not * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload * of the send file. The useraccounting code ensures that the useraccounting * info is not present upon a receive, so the local MAC can simply be cleared * out at that time. For more info about objset_phys_t authentication, see * zio_crypt_do_objset_hmacs(). * * CONSIDERATIONS FOR DEDUP: * In order for dedup to work, blocks that we want to dedup with one another * need to use the same IV and encryption key, so that they will have the same * ciphertext. Normally, one should never reuse an IV with the same encryption * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both * blocks. In this case, however, since we are using the same plaintext as * well all that we end up with is a duplicate of the original ciphertext we * already had. As a result, an attacker with read access to the raw disk will * be able to tell which blocks are the same but this information is given away * by dedup anyway. In order to get the same IVs and encryption keys for * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC * here so that a reproducible checksum of the plaintext is never available to * the attacker. The HMAC key is kept alongside the master key, encrypted on * disk. The first 64 bits of the HMAC are used in place of the random salt, and * the next 96 bits are used as the IV. As a result of this mechanism, dedup * will only work within a clone family since encrypted dedup requires use of * the same master and HMAC keys. */ /* * After encrypting many blocks with the same key we may start to run up * against the theoretical limits of how much data can securely be encrypted * with a single key using the supported encryption modes. The most obvious * limitation is that our risk of generating 2 equivalent 96 bit IVs increases * the more IVs we generate (which both GCM and CCM modes strictly forbid). * This risk actually grows surprisingly quickly over time according to the * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have * generated n IVs with a cryptographically secure RNG, the approximate * probability p(n) of a collision is given as: * * p(n) ~= e^(-n*(n-1)/(2*(2^96))) * * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html] * * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion * we must not write more than 398,065,730 blocks with the same encryption key. * Therefore, we rotate our keys after 400,000,000 blocks have been written by * generating a new random 64 bit salt for our HKDF encryption key generation * function. */ #define ZFS_KEY_MAX_SALT_USES_DEFAULT 400000000 #define ZFS_CURRENT_MAX_SALT_USES \ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) static unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; typedef struct blkptr_auth_buf { uint64_t bab_prop; /* blk_prop - portable mask */ uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */ uint64_t bab_pad; /* reserved for future use */ } blkptr_auth_buf_t; const zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { {"", ZC_TYPE_NONE, 0, "inherit"}, {"", ZC_TYPE_NONE, 0, "on"}, {"", ZC_TYPE_NONE, 0, "off"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"} }; static void zio_crypt_key_destroy_early(zio_crypt_key_t *key) { rw_destroy(&key->zk_salt_lock); /* free crypto templates */ bzero(&key->zk_session, sizeof (key->zk_session)); /* zero out sensitive data */ bzero(key, sizeof (zio_crypt_key_t)); } void zio_crypt_key_destroy(zio_crypt_key_t *key) { freebsd_crypt_freesession(&key->zk_session); zio_crypt_key_destroy_early(key); } int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) { int ret; crypto_mechanism_t mech __unused; uint_t keydata_len; const zio_crypt_info_t *ci = NULL; ASSERT3P(key, !=, NULL); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ci = &zio_crypt_table[crypt]; if (ci->ci_crypt_type != ZC_TYPE_GCM && ci->ci_crypt_type != ZC_TYPE_CCM) return (ENOTSUP); keydata_len = zio_crypt_table[crypt].ci_keylen; bzero(key, sizeof (zio_crypt_key_t)); rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); /* fill keydata buffers and salt with random data */ ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t)); if (ret != 0) goto error; ret = random_get_bytes(key->zk_master_keydata, keydata_len); if (ret != 0) goto error; ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN); if (ret != 0) goto error; ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; /* derive the current key from the master key */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto error; /* initialize keys for the ICP */ key->zk_current_key.ck_format = CRYPTO_KEY_RAW; key->zk_current_key.ck_data = key->zk_current_keydata; key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; key->zk_hmac_key.ck_data = &key->zk_hmac_key; key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); ci = &zio_crypt_table[crypt]; if (ci->ci_crypt_type != ZC_TYPE_GCM && ci->ci_crypt_type != ZC_TYPE_CCM) return (ENOTSUP); ret = freebsd_crypt_newsession(&key->zk_session, ci, &key->zk_current_key); if (ret) goto error; key->zk_crypt = crypt; key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; key->zk_salt_count = 0; return (0); error: zio_crypt_key_destroy_early(key); return (ret); } static int zio_crypt_key_change_salt(zio_crypt_key_t *key) { int ret = 0; uint8_t salt[ZIO_DATA_SALT_LEN]; crypto_mechanism_t mech __unused; uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen; /* generate a new salt */ ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; rw_enter(&key->zk_salt_lock, RW_WRITER); /* someone beat us to the salt rotation, just unlock and return */ if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES) goto out_unlock; /* derive the current key from the master key and the new salt */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto out_unlock; /* assign the salt and reset the usage count */ bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN); key->zk_salt_count = 0; freebsd_crypt_freesession(&key->zk_session); ret = freebsd_crypt_newsession(&key->zk_session, &zio_crypt_table[key->zk_crypt], &key->zk_current_key); if (ret != 0) goto out_unlock; rw_exit(&key->zk_salt_lock); return (0); out_unlock: rw_exit(&key->zk_salt_lock); error: return (ret); } /* See comment above zfs_key_max_salt_uses definition for details */ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) { int ret; boolean_t salt_change; rw_enter(&key->zk_salt_lock, RW_READER); bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN); salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >= ZFS_CURRENT_MAX_SALT_USES); rw_exit(&key->zk_salt_lock); if (salt_change) { ret = zio_crypt_key_change_salt(key); if (ret != 0) goto error; } return (0); error: return (ret); } void *failed_decrypt_buf; int failed_decrypt_size; /* * This function handles all encryption and decryption in zfs. When * encrypting it expects puio to reference the plaintext and cuio to * reference the ciphertext. cuio must have enough space for the * ciphertext + room for a MAC. datalen should be the length of the * plaintext / ciphertext alone. */ /* * The implementation for FreeBSD's OpenCrypto. * * The big difference between ICP and FOC is that FOC uses a single * buffer for input and output. This means that (for AES-GCM, the * only one supported right now) the source must be copied into the * destination, and the destination must have the AAD, and the tag/MAC, * already associated with it. (Both implementations can use a uio.) * * Since the auth data is part of the iovec array, all we need to know * is the length: 0 means there's no AAD. * */ static int zio_do_crypt_uio_opencrypto(boolean_t encrypt, freebsd_crypt_session_t *sess, uint64_t crypt, crypto_key_t *key, uint8_t *ivbuf, uint_t datalen, zfs_uio_t *uio, uint_t auth_len) { const zio_crypt_info_t *ci = &zio_crypt_table[crypt]; if (ci->ci_crypt_type != ZC_TYPE_GCM && ci->ci_crypt_type != ZC_TYPE_CCM) return (ENOTSUP); int ret = freebsd_crypt_uio(encrypt, sess, ci, uio, key, ivbuf, datalen, auth_len); if (ret != 0) { #ifdef FCRYPTO_DEBUG printf("%s(%d): Returning error %s\n", __FUNCTION__, __LINE__, encrypt ? "EIO" : "ECKSUM"); #endif ret = SET_ERROR(encrypt ? EIO : ECKSUM); } return (ret); } int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) { int ret; uint64_t aad[3]; /* * With OpenCrypto in FreeBSD, the same buffer is used for * input and output. Also, the AAD (for AES-GMC at least) * needs to logically go in front. */ zfs_uio_t cuio; struct uio cuio_s; iovec_t iovecs[4]; uint64_t crypt = key->zk_crypt; uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); zfs_uio_init(&cuio, &cuio_s); keydata_len = zio_crypt_table[crypt].ci_keylen; /* generate iv for wrapping the master and hmac key */ ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN); if (ret != 0) goto error; /* * Since we only support one buffer, we need to copy * the plain text (source) to the cipher buffer (dest). * We set iovecs[0] -- the authentication data -- below. */ bcopy((void*)key->zk_master_keydata, keydata_out, keydata_len); bcopy((void*)key->zk_hmac_keydata, hmac_keydata_out, SHA512_HMAC_KEYLEN); iovecs[1].iov_base = keydata_out; iovecs[1].iov_len = keydata_len; iovecs[2].iov_base = hmac_keydata_out; iovecs[2].iov_len = SHA512_HMAC_KEYLEN; iovecs[3].iov_base = mac; iovecs[3].iov_len = WRAPPING_MAC_LEN; /* * Although we don't support writing to the old format, we do * support rewrapping the key so that the user can move and * quarantine datasets on the old format. */ if (key->zk_version == 0) { aad_len = sizeof (uint64_t); aad[0] = LE_64(key->zk_guid); } else { ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); aad_len = sizeof (uint64_t) * 3; aad[0] = LE_64(key->zk_guid); aad[1] = LE_64(crypt); aad[2] = LE_64(key->zk_version); } iovecs[0].iov_base = aad; iovecs[0].iov_len = aad_len; enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; GET_UIO_STRUCT(&cuio)->uio_iov = iovecs; zfs_uio_iovcnt(&cuio) = 4; zfs_uio_segflg(&cuio) = UIO_SYSSPACE; /* encrypt the keys and store the resulting ciphertext and mac */ ret = zio_do_crypt_uio_opencrypto(B_TRUE, NULL, crypt, cwkey, iv, enc_len, &cuio, aad_len); if (ret != 0) goto error; return (0); error: return (ret); } int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) { int ret; uint64_t aad[3]; /* * With OpenCrypto in FreeBSD, the same buffer is used for * input and output. Also, the AAD (for AES-GMC at least) * needs to logically go in front. */ zfs_uio_t cuio; struct uio cuio_s; iovec_t iovecs[4]; void *src, *dst; uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); keydata_len = zio_crypt_table[crypt].ci_keylen; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); zfs_uio_init(&cuio, &cuio_s); /* * Since we only support one buffer, we need to copy * the encrypted buffer (source) to the plain buffer * (dest). We set iovecs[0] -- the authentication data -- * below. */ dst = key->zk_master_keydata; src = keydata; bcopy(src, dst, keydata_len); dst = key->zk_hmac_keydata; src = hmac_keydata; bcopy(src, dst, SHA512_HMAC_KEYLEN); iovecs[1].iov_base = key->zk_master_keydata; iovecs[1].iov_len = keydata_len; iovecs[2].iov_base = key->zk_hmac_keydata; iovecs[2].iov_len = SHA512_HMAC_KEYLEN; iovecs[3].iov_base = mac; iovecs[3].iov_len = WRAPPING_MAC_LEN; if (version == 0) { aad_len = sizeof (uint64_t); aad[0] = LE_64(guid); } else { ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); aad_len = sizeof (uint64_t) * 3; aad[0] = LE_64(guid); aad[1] = LE_64(crypt); aad[2] = LE_64(version); } enc_len = keydata_len + SHA512_HMAC_KEYLEN; iovecs[0].iov_base = aad; iovecs[0].iov_len = aad_len; GET_UIO_STRUCT(&cuio)->uio_iov = iovecs; zfs_uio_iovcnt(&cuio) = 4; zfs_uio_segflg(&cuio) = UIO_SYSSPACE; /* decrypt the keys and store the result in the output buffers */ ret = zio_do_crypt_uio_opencrypto(B_FALSE, NULL, crypt, cwkey, iv, enc_len, &cuio, aad_len); if (ret != 0) goto error; /* generate a fresh salt */ ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; /* derive the current key from the master key */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto error; /* initialize keys for ICP */ key->zk_current_key.ck_format = CRYPTO_KEY_RAW; key->zk_current_key.ck_data = key->zk_current_keydata; key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; key->zk_hmac_key.ck_data = key->zk_hmac_keydata; key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); ret = freebsd_crypt_newsession(&key->zk_session, &zio_crypt_table[crypt], &key->zk_current_key); if (ret != 0) goto error; key->zk_crypt = crypt; key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; return (0); error: zio_crypt_key_destroy_early(key); return (ret); } int zio_crypt_generate_iv(uint8_t *ivbuf) { int ret; /* randomly generate the IV */ ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN); if (ret != 0) goto error; return (0); error: bzero(ivbuf, ZIO_DATA_IV_LEN); return (ret); } int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *digestbuf, uint_t digestlen) { uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH]; ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH); crypto_mac(&key->zk_hmac_key, data, datalen, raw_digestbuf, SHA512_DIGEST_LENGTH); bcopy(raw_digestbuf, digestbuf, digestlen); return (0); } int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *ivbuf, uint8_t *salt) { int ret; uint8_t digestbuf[SHA512_DIGEST_LENGTH]; ret = zio_crypt_do_hmac(key, data, datalen, digestbuf, SHA512_DIGEST_LENGTH); if (ret != 0) return (ret); bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN); bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN); return (0); } /* * The following functions are used to encode and decode encryption parameters * into blkptr_t and zil_header_t. The ICP wants to use these parameters as * byte strings, which normally means that these strings would not need to deal * with byteswapping at all. However, both blkptr_t and zil_header_t may be * byteswapped by lower layers and so we must "undo" that byteswap here upon * decoding and encoding in a non-native byteorder. These functions require * that the byteorder bit is correct before being called. */ void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) { uint64_t val64; uint32_t val32; ASSERT(BP_IS_ENCRYPTED(bp)); if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); BP_SET_IV2(bp, val32); } else { bcopy(salt, &val64, sizeof (uint64_t)); bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); bcopy(iv, &val64, sizeof (uint64_t)); bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); BP_SET_IV2(bp, BSWAP_32(val32)); } } void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) { uint64_t val64; uint32_t val32; ASSERT(BP_IS_PROTECTED(bp)); /* for convenience, so callers don't need to check */ if (BP_IS_AUTHENTICATED(bp)) { bzero(salt, ZIO_DATA_SALT_LEN); bzero(iv, ZIO_DATA_IV_LEN); return; } if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t)); bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t)); val32 = (uint32_t)BP_GET_IV2(bp); bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); } else { val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]); bcopy(&val64, salt, sizeof (uint64_t)); val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]); bcopy(&val64, iv, sizeof (uint64_t)); val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp)); bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); } } void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) { uint64_t val64; ASSERT(BP_USES_CRYPT(bp)); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], sizeof (uint64_t)); } else { bcopy(mac, &val64, sizeof (uint64_t)); bp->blk_cksum.zc_word[2] = BSWAP_64(val64); bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); bp->blk_cksum.zc_word[3] = BSWAP_64(val64); } } void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac) { uint64_t val64; ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp)); /* for convenience, so callers don't need to check */ if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { bzero(mac, ZIO_DATA_MAC_LEN); return; } if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t)); bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t), sizeof (uint64_t)); } else { val64 = BSWAP_64(bp->blk_cksum.zc_word[2]); bcopy(&val64, mac, sizeof (uint64_t)); val64 = BSWAP_64(bp->blk_cksum.zc_word[3]); bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t)); } } void zio_crypt_encode_mac_zil(void *data, uint8_t *mac) { zil_chain_t *zilc = data; bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t)); bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3], sizeof (uint64_t)); } void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac) { /* * The ZIL MAC is embedded in the block it protects, which will * not have been byteswapped by the time this function has been called. * As a result, we don't need to worry about byteswapping the MAC. */ const zil_chain_t *zilc = data; bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t)); bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t), sizeof (uint64_t)); } /* * This routine takes a block of dnodes (src_abd) and copies only the bonus * buffers to the same offsets in the dst buffer. datalen should be the size * of both the src_abd and the dst buffer (not just the length of the bonus * buffers). */ void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) { uint_t i, max_dnp = datalen >> DNODE_SHIFT; uint8_t *src; dnode_phys_t *dnp, *sdnp, *ddnp; src = abd_borrow_buf_copy(src_abd, datalen); sdnp = (dnode_phys_t *)src; ddnp = (dnode_phys_t *)dst; for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { dnp = &sdnp[i]; if (dnp->dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && dnp->dn_bonuslen != 0) { bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), DN_MAX_BONUS_LEN(dnp)); } } abd_return_buf(src_abd, src, datalen); } /* * This function decides what fields from blk_prop are included in * the on-disk various MAC algorithms. */ static void zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) { int avoidlint = SPA_MINBLOCKSIZE; /* * Version 0 did not properly zero out all non-portable fields * as it should have done. We maintain this code so that we can * do read-only imports of pools on this version. */ if (version == 0) { BP_SET_DEDUP(bp, 0); BP_SET_CHECKSUM(bp, 0); BP_SET_PSIZE(bp, avoidlint); return; } ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); /* * The hole_birth feature might set these fields even if this bp * is a hole. We zero them out here to guarantee that raw sends * will function with or without the feature. */ if (BP_IS_HOLE(bp)) { bp->blk_prop = 0ULL; return; } /* * At L0 we want to verify these fields to ensure that data blocks * can not be reinterpreted. For instance, we do not want an attacker * to trick us into returning raw lz4 compressed data to the user * by modifying the compression bits. At higher levels, we cannot * enforce this policy since raw sends do not convey any information * about indirect blocks, so these values might be different on the * receive side. Fortunately, this does not open any new attack * vectors, since any alterations that can be made to a higher level * bp must still verify the correct order of the layer below it. */ if (BP_GET_LEVEL(bp) != 0) { BP_SET_BYTEORDER(bp, 0); BP_SET_COMPRESS(bp, 0); /* * psize cannot be set to zero or it will trigger * asserts, but the value doesn't really matter as * long as it is constant. */ BP_SET_PSIZE(bp, avoidlint); } BP_SET_DEDUP(bp, 0); BP_SET_CHECKSUM(bp, 0); } static void zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, blkptr_auth_buf_t *bab, uint_t *bab_len) { blkptr_t tmpbp = *bp; if (should_bswap) byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); ASSERT0(BP_IS_EMBEDDED(&tmpbp)); zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); /* * We always MAC blk_prop in LE to ensure portability. This * must be done after decoding the mac, since the endianness * will get zero'd out here. */ zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); bab->bab_prop = LE_64(tmpbp.blk_prop); bab->bab_pad = 0ULL; /* version 0 did not include the padding */ *bab_len = sizeof (blkptr_auth_buf_t); if (version == 0) *bab_len -= sizeof (uint64_t); } static int zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { uint_t bab_len; blkptr_auth_buf_t bab; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); crypto_mac_update(ctx, &bab, bab_len); return (0); } static void zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { uint_t bab_len; blkptr_auth_buf_t bab; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); SHA2Update(ctx, &bab, bab_len); } static void zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { uint_t bab_len; blkptr_auth_buf_t bab; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); bcopy(&bab, *aadp, bab_len); *aadp += bab_len; *aad_len += bab_len; } static int zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, boolean_t should_bswap, dnode_phys_t *dnp) { int ret, i; dnode_phys_t *adnp; boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)]; /* authenticate the core dnode (masking out non-portable bits) */ bcopy(dnp, tmp_dncore, sizeof (tmp_dncore)); adnp = (dnode_phys_t *)tmp_dncore; if (le_bswap) { adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec); adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen); adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid); adnp->dn_used = BSWAP_64(adnp->dn_used); } adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; adnp->dn_used = 0; crypto_mac_update(ctx, adnp, sizeof (tmp_dncore)); for (i = 0; i < dnp->dn_nblkptr; i++) { ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, &dnp->dn_blkptr[i]); if (ret != 0) goto error; } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, DN_SPILL_BLKPTR(dnp)); if (ret != 0) goto error; } return (0); error: return (ret); } /* * objset_phys_t blocks introduce a number of exceptions to the normal * authentication process. objset_phys_t's contain 2 separate HMACS for * protecting the integrity of their data. The portable_mac protects the * metadnode. This MAC can be sent with a raw send and protects against * reordering of data within the metadnode. The local_mac protects the user * accounting objects which are not sent from one system to another. * * In addition, objset blocks are the only blocks that can be modified and * written to disk without the key loaded under certain circumstances. During * zil_claim() we need to be able to update the zil_header_t to complete * claiming log blocks and during raw receives we need to write out the * portable_mac from the send file. Both of these actions are possible * because these fields are not protected by either MAC so neither one will * need to modify the MACs without the key. However, when the modified blocks * are written out they will be byteswapped into the host machine's native * endianness which will modify fields protected by the MAC. As a result, MAC * calculation for objset blocks works slightly differently from other block * types. Where other block types MAC the data in whatever endianness is * written to disk, objset blocks always MAC little endian version of their * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP() * and le_bswap indicates whether a byteswap is needed to get this block * into little endian format. */ int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac) { int ret; struct hmac_ctx hash_ctx; struct hmac_ctx *ctx = &hash_ctx; objset_phys_t *osp = data; uint64_t intval; boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH]; uint8_t raw_local_mac[SHA512_DIGEST_LENGTH]; /* calculate the portable MAC from the portable fields and metadnode */ crypto_mac_init(ctx, &key->zk_hmac_key); /* add in the os_type */ intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type); crypto_mac_update(ctx, &intval, sizeof (uint64_t)); /* add in the portable os_flags */ intval = osp->os_flags; if (should_bswap) intval = BSWAP_64(intval); intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK; if (!ZFS_HOST_BYTEORDER) intval = BSWAP_64(intval); crypto_mac_update(ctx, &intval, sizeof (uint64_t)); /* add in fields from the metadnode */ ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_meta_dnode); if (ret) goto error; crypto_mac_final(ctx, raw_portable_mac, SHA512_DIGEST_LENGTH); bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); + /* + * This is necessary here as we check next whether + * OBJSET_FLAG_USERACCOUNTING_COMPLETE is set in order to + * decide if the local_mac should be zeroed out. That flag will always + * be set by dmu_objset_id_quota_upgrade_cb() and + * dmu_objset_userspace_upgrade_cb() if useraccounting has been + * completed. + */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + boolean_t uacct_incomplete = + !(intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE); + /* * The local MAC protects the user, group and project accounting. * If these objects are not present, the local MAC is zeroed out. */ - if ((datalen >= OBJSET_PHYS_SIZE_V3 && + if (uacct_incomplete || + (datalen >= OBJSET_PHYS_SIZE_V3 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE && osp->os_projectused_dnode.dn_type == DMU_OT_NONE) || (datalen >= OBJSET_PHYS_SIZE_V2 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE) || (datalen <= OBJSET_PHYS_SIZE_V1)) { bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (0); } /* calculate the local MAC from the userused and groupused dnodes */ crypto_mac_init(ctx, &key->zk_hmac_key); /* add in the non-portable os_flags */ intval = osp->os_flags; if (should_bswap) intval = BSWAP_64(intval); intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK; if (!ZFS_HOST_BYTEORDER) intval = BSWAP_64(intval); crypto_mac_update(ctx, &intval, sizeof (uint64_t)); /* XXX check dnode type ... */ /* add in fields from the user accounting dnodes */ if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_userused_dnode); if (ret) goto error; } if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_groupused_dnode); if (ret) goto error; } if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE && datalen >= OBJSET_PHYS_SIZE_V3) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_projectused_dnode); if (ret) goto error; } crypto_mac_final(ctx, raw_local_mac, SHA512_DIGEST_LENGTH); bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN); return (0); error: bzero(portable_mac, ZIO_OBJSET_MAC_LEN); bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (ret); } static void zio_crypt_destroy_uio(zfs_uio_t *uio) { if (GET_UIO_STRUCT(uio)->uio_iov) kmem_free(GET_UIO_STRUCT(uio)->uio_iov, zfs_uio_iovcnt(uio) * sizeof (iovec_t)); } /* * This function parses an uncompressed indirect block and returns a checksum * of all the portable fields from all of the contained bps. The portable * fields are the MAC and all of the fields from blk_prop except for the dedup, * checksum, and psize bits. For an explanation of the purpose of this, see * the comment block on object set authentication. */ static int zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) { blkptr_t *bp; int i, epb = datalen >> SPA_BLKPTRSHIFT; SHA2_CTX ctx; uint8_t digestbuf[SHA512_DIGEST_LENGTH]; /* checksum all of the MACs from the layer below */ SHA2Init(SHA512, &ctx); for (i = 0, bp = buf; i < epb; i++, bp++) { zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, byteswap, bp); } SHA2Final(digestbuf, &ctx); if (generate) { bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN); return (0); } if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0) { #ifdef FCRYPTO_DEBUG printf("%s(%d): Setting ECKSUM\n", __FUNCTION__, __LINE__); #endif return (SET_ERROR(ECKSUM)); } return (0); } int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { int ret; /* * Unfortunately, callers of this function will not always have * easy access to the on-disk format version. This info is * normally found in the DSL Crypto Key, but the checksum-of-MACs * is expected to be verifiable even when the key isn't loaded. * Here, instead of doing a ZAP lookup for the version for each * zio, we simply try both existing formats. */ ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); if (ret == ECKSUM) { ASSERT(!generate); ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, datalen, 0, byteswap, cksum); } return (ret); } int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { int ret; void *buf; buf = abd_borrow_buf_copy(abd, datalen); ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen, byteswap, cksum); abd_return_buf(abd, buf, datalen); return (ret); } /* * Special case handling routine for encrypting / decrypting ZIL blocks. * We do not check for the older ZIL chain because the encryption feature * was not available before the newer ZIL chain was introduced. The goal * here is to encrypt everything except the blkptr_t of a lr_write_t and * the zil_chain_t header. Everything that is not encrypted is authenticated. */ /* * The OpenCrypto used in FreeBSD does not use separate source and * destination buffers; instead, the same buffer is used. Further, to * accommodate some of the drivers, the authbuf needs to be logically before * the data. This means that we need to copy the source to the destination, * and set up an extra iovec_t at the beginning to handle the authbuf. * It also means we'll only return one zfs_uio_t. */ static int zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio, zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { (void) puio; uint8_t *aadbuf = zio_buf_alloc(datalen); uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp; iovec_t *dst_iovecs; zil_chain_t *zilc; lr_t *lr; uint64_t txtype, lr_len; uint_t crypt_len, nr_iovecs, vec; uint_t aad_len = 0, total_len = 0; if (encrypt) { src = plainbuf; dst = cipherbuf; } else { src = cipherbuf; dst = plainbuf; } bcopy(src, dst, datalen); /* Find the start and end record of the log block. */ zilc = (zil_chain_t *)src; slrp = src + sizeof (zil_chain_t); aadp = aadbuf; blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); /* * Calculate the number of encrypted iovecs we will need. */ /* We need at least two iovecs -- one for the AAD, one for the MAC. */ nr_iovecs = 2; for (; slrp < blkend; slrp += lr_len) { lr = (lr_t *)slrp; if (byteswap) { txtype = BSWAP_64(lr->lrc_txtype); lr_len = BSWAP_64(lr->lrc_reclen); } else { txtype = lr->lrc_txtype; lr_len = lr->lrc_reclen; } nr_iovecs++; if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) nr_iovecs++; } dst_iovecs = kmem_alloc(nr_iovecs * sizeof (iovec_t), KM_SLEEP); /* * Copy the plain zil header over and authenticate everything except * the checksum that will store our MAC. If we are writing the data * the embedded checksum will not have been calculated yet, so we don't * authenticate that. */ bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t)); aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t); aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t); slrp = src + sizeof (zil_chain_t); dlrp = dst + sizeof (zil_chain_t); /* * Loop over records again, filling in iovecs. */ /* The first iovec will contain the authbuf. */ vec = 1; for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) { lr = (lr_t *)slrp; if (!byteswap) { txtype = lr->lrc_txtype; lr_len = lr->lrc_reclen; } else { txtype = BSWAP_64(lr->lrc_txtype); lr_len = BSWAP_64(lr->lrc_reclen); } /* copy the common lr_t */ bcopy(slrp, dlrp, sizeof (lr_t)); bcopy(slrp, aadp, sizeof (lr_t)); aadp += sizeof (lr_t); aad_len += sizeof (lr_t); /* * If this is a TX_WRITE record we want to encrypt everything * except the bp if exists. If the bp does exist we want to * authenticate it. */ if (txtype == TX_WRITE) { crypt_len = sizeof (lr_write_t) - sizeof (lr_t) - sizeof (blkptr_t); dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t); dst_iovecs[vec].iov_len = crypt_len; /* copy the bp now since it will not be encrypted */ bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), dlrp + sizeof (lr_write_t) - sizeof (blkptr_t), sizeof (blkptr_t)); bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), aadp, sizeof (blkptr_t)); aadp += sizeof (blkptr_t); aad_len += sizeof (blkptr_t); vec++; total_len += crypt_len; if (lr_len != sizeof (lr_write_t)) { crypt_len = lr_len - sizeof (lr_write_t); dst_iovecs[vec].iov_base = (char *) dlrp + sizeof (lr_write_t); dst_iovecs[vec].iov_len = crypt_len; vec++; total_len += crypt_len; } } else { crypt_len = lr_len - sizeof (lr_t); dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t); dst_iovecs[vec].iov_len = crypt_len; vec++; total_len += crypt_len; } } /* The last iovec will contain the MAC. */ ASSERT3U(vec, ==, nr_iovecs - 1); /* AAD */ dst_iovecs[0].iov_base = aadbuf; dst_iovecs[0].iov_len = aad_len; /* MAC */ dst_iovecs[vec].iov_base = 0; dst_iovecs[vec].iov_len = 0; *no_crypt = (vec == 1); *enc_len = total_len; *authbuf = aadbuf; *auth_len = aad_len; GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs; zfs_uio_iovcnt(out_uio) = nr_iovecs; return (0); } /* * Special case handling routine for encrypting / decrypting dnode blocks. */ static int zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio, zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { uint8_t *aadbuf = zio_buf_alloc(datalen); uint8_t *src, *dst, *aadp; dnode_phys_t *dnp, *adnp, *sdnp, *ddnp; iovec_t *dst_iovecs; uint_t nr_iovecs, crypt_len, vec; uint_t aad_len = 0, total_len = 0; uint_t i, j, max_dnp = datalen >> DNODE_SHIFT; if (encrypt) { src = plainbuf; dst = cipherbuf; } else { src = cipherbuf; dst = plainbuf; } bcopy(src, dst, datalen); sdnp = (dnode_phys_t *)src; ddnp = (dnode_phys_t *)dst; aadp = aadbuf; /* * Count the number of iovecs we will need to do the encryption by * counting the number of bonus buffers that need to be encrypted. */ /* We need at least two iovecs -- one for the AAD, one for the MAC. */ nr_iovecs = 2; for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { /* * This block may still be byteswapped. However, all of the * values we use are either uint8_t's (for which byteswapping * is a noop) or a * != 0 check, which will work regardless * of whether or not we byteswap. */ if (sdnp[i].dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) && sdnp[i].dn_bonuslen != 0) { nr_iovecs++; } } dst_iovecs = kmem_alloc(nr_iovecs * sizeof (iovec_t), KM_SLEEP); /* * Iterate through the dnodes again, this time filling in the uios * we allocated earlier. We also concatenate any data we want to * authenticate onto aadbuf. */ /* The first iovec will contain the authbuf. */ vec = 1; for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { dnp = &sdnp[i]; /* copy over the core fields and blkptrs (kept as plaintext) */ bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp); if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]), sizeof (blkptr_t)); } /* * Handle authenticated data. We authenticate everything in * the dnode that can be brought over when we do a raw send. * This includes all of the core fields as well as the MACs * stored in the bp checksums and all of the portable bits * from blk_prop. We include the dnode padding here in case it * ever gets used in the future. Some dn_flags and dn_used are * not portable so we mask those out values out of the * authenticated data. */ crypt_len = offsetof(dnode_phys_t, dn_blkptr); bcopy(dnp, aadp, crypt_len); adnp = (dnode_phys_t *)aadp; adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; adnp->dn_used = 0; aadp += crypt_len; aad_len += crypt_len; for (j = 0; j < dnp->dn_nblkptr; j++) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, version, byteswap, &dnp->dn_blkptr[j]); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, version, byteswap, DN_SPILL_BLKPTR(dnp)); } /* * If this bonus buffer needs to be encrypted, we prepare an * iovec_t. The encryption / decryption functions will fill * this in for us with the encrypted or decrypted data. * Otherwise we add the bonus buffer to the authenticated * data buffer and copy it over to the destination. The * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that * we can guarantee alignment with the AES block size * (128 bits). */ crypt_len = DN_MAX_BONUS_LEN(dnp); if (dnp->dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && dnp->dn_bonuslen != 0) { dst_iovecs[vec].iov_base = DN_BONUS(&ddnp[i]); dst_iovecs[vec].iov_len = crypt_len; vec++; total_len += crypt_len; } else { bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len); bcopy(DN_BONUS(dnp), aadp, crypt_len); aadp += crypt_len; aad_len += crypt_len; } } /* The last iovec will contain the MAC. */ ASSERT3U(vec, ==, nr_iovecs - 1); /* AAD */ dst_iovecs[0].iov_base = aadbuf; dst_iovecs[0].iov_len = aad_len; /* MAC */ dst_iovecs[vec].iov_base = 0; dst_iovecs[vec].iov_len = 0; *no_crypt = (vec == 1); *enc_len = total_len; *authbuf = aadbuf; *auth_len = aad_len; GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs; zfs_uio_iovcnt(out_uio) = nr_iovecs; return (0); } static int zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *out_uio, uint_t *enc_len) { (void) puio; int ret; uint_t nr_plain = 1, nr_cipher = 2; iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; void *src, *dst; cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), KM_SLEEP); if (!cipher_iovecs) { ret = SET_ERROR(ENOMEM); goto error; } bzero(cipher_iovecs, nr_cipher * sizeof (iovec_t)); if (encrypt) { src = plainbuf; dst = cipherbuf; } else { src = cipherbuf; dst = plainbuf; } bcopy(src, dst, datalen); cipher_iovecs[0].iov_base = dst; cipher_iovecs[0].iov_len = datalen; *enc_len = datalen; GET_UIO_STRUCT(out_uio)->uio_iov = cipher_iovecs; zfs_uio_iovcnt(out_uio) = nr_cipher; return (0); error: if (plain_iovecs != NULL) kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t)); if (cipher_iovecs != NULL) kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t)); *enc_len = 0; GET_UIO_STRUCT(out_uio)->uio_iov = NULL; zfs_uio_iovcnt(out_uio) = 0; return (ret); } /* * This function builds up the plaintext (puio) and ciphertext (cuio) uios so * that they can be used for encryption and decryption by zio_do_crypt_uio(). * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks * requiring special handling to parse out pieces that are to be encrypted. The * authbuf is used by these special cases to store additional authenticated * data (AAD) for the encryption modes. */ static int zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; iovec_t *mac_iov; ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE); /* route to handler */ switch (ot) { case DMU_OT_INTENT_LOG: ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, no_crypt); break; case DMU_OT_DNODE: ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, no_crypt); break; default: ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, datalen, puio, cuio, enc_len); *authbuf = NULL; *auth_len = 0; *no_crypt = B_FALSE; break; } if (ret != 0) goto error; /* populate the uios */ zfs_uio_segflg(cuio) = UIO_SYSSPACE; mac_iov = ((iovec_t *)&(GET_UIO_STRUCT(cuio)-> uio_iov[zfs_uio_iovcnt(cuio) - 1])); mac_iov->iov_base = (void *)mac; mac_iov->iov_len = ZIO_DATA_MAC_LEN; return (0); error: return (ret); } void *failed_decrypt_buf; int faile_decrypt_size; /* * Primary encryption / decryption entrypoint for zio data. */ int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt) { int ret; boolean_t locked = B_FALSE; uint64_t crypt = key->zk_crypt; uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; uint_t enc_len, auth_len; zfs_uio_t puio, cuio; struct uio puio_s, cuio_s; uint8_t enc_keydata[MASTER_KEY_MAX_LEN]; crypto_key_t tmp_ckey, *ckey = NULL; freebsd_crypt_session_t *tmpl = NULL; uint8_t *authbuf = NULL; zfs_uio_init(&puio, &puio_s); zfs_uio_init(&cuio, &cuio_s); bzero(GET_UIO_STRUCT(&puio), sizeof (struct uio)); bzero(GET_UIO_STRUCT(&cuio), sizeof (struct uio)); #ifdef FCRYPTO_DEBUG printf("%s(%s, %p, %p, %d, %p, %p, %u, %s, %p, %p, %p)\n", __FUNCTION__, encrypt ? "encrypt" : "decrypt", key, salt, ot, iv, mac, datalen, byteswap ? "byteswap" : "native_endian", plainbuf, cipherbuf, no_crypt); printf("\tkey = {"); for (int i = 0; i < key->zk_current_key.ck_length/8; i++) printf("%02x ", ((uint8_t *)key->zk_current_key.ck_data)[i]); printf("}\n"); #endif /* create uios for encryption */ ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, no_crypt); if (ret != 0) return (ret); /* * If the needed key is the current one, just use it. Otherwise we * need to generate a temporary one from the given salt + master key. * If we are encrypting, we must return a copy of the current salt * so that it can be stored in the blkptr_t. */ rw_enter(&key->zk_salt_lock, RW_READER); locked = B_TRUE; if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) { ckey = &key->zk_current_key; tmpl = &key->zk_session; } else { rw_exit(&key->zk_salt_lock); locked = B_FALSE; ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len); if (ret != 0) goto error; tmp_ckey.ck_format = CRYPTO_KEY_RAW; tmp_ckey.ck_data = enc_keydata; tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len); ckey = &tmp_ckey; tmpl = NULL; } /* perform the encryption / decryption */ ret = zio_do_crypt_uio_opencrypto(encrypt, tmpl, key->zk_crypt, ckey, iv, enc_len, &cuio, auth_len); if (ret != 0) goto error; if (locked) { rw_exit(&key->zk_salt_lock); locked = B_FALSE; } if (authbuf != NULL) zio_buf_free(authbuf, datalen); if (ckey == &tmp_ckey) bzero(enc_keydata, keydata_len); zio_crypt_destroy_uio(&puio); zio_crypt_destroy_uio(&cuio); return (0); error: if (!encrypt) { if (failed_decrypt_buf != NULL) kmem_free(failed_decrypt_buf, failed_decrypt_size); failed_decrypt_buf = kmem_alloc(datalen, KM_SLEEP); failed_decrypt_size = datalen; bcopy(cipherbuf, failed_decrypt_buf, datalen); } if (locked) rw_exit(&key->zk_salt_lock); if (authbuf != NULL) zio_buf_free(authbuf, datalen); if (ckey == &tmp_ckey) bzero(enc_keydata, keydata_len); zio_crypt_destroy_uio(&puio); zio_crypt_destroy_uio(&cuio); return (SET_ERROR(ret)); } /* * Simple wrapper around zio_do_crypt_data() to work with abd's instead of * linear buffers. */ int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) { int ret; void *ptmp, *ctmp; if (encrypt) { ptmp = abd_borrow_buf_copy(pabd, datalen); ctmp = abd_borrow_buf(cabd, datalen); } else { ptmp = abd_borrow_buf(pabd, datalen); ctmp = abd_borrow_buf_copy(cabd, datalen); } ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac, datalen, ptmp, ctmp, no_crypt); if (ret != 0) goto error; if (encrypt) { abd_return_buf(pabd, ptmp, datalen); abd_return_buf_copy(cabd, ctmp, datalen); } else { abd_return_buf_copy(pabd, ptmp, datalen); abd_return_buf(cabd, ctmp, datalen); } return (0); error: if (encrypt) { abd_return_buf(pabd, ptmp, datalen); abd_return_buf_copy(cabd, ctmp, datalen); } else { abd_return_buf_copy(pabd, ptmp, datalen); abd_return_buf(cabd, ctmp, datalen); } return (SET_ERROR(ret)); } #if defined(_KERNEL) && defined(HAVE_SPL) /* BEGIN CSTYLED */ module_param(zfs_key_max_salt_uses, ulong, 0644); MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value " "can be used for generating encryption keys before it is rotated"); /* END CSTYLED */ #endif diff --git a/module/os/linux/zfs/zio_crypt.c b/module/os/linux/zfs/zio_crypt.c index 9f8b9f53e965..a979f7e20c1b 100644 --- a/module/os/linux/zfs/zio_crypt.c +++ b/module/os/linux/zfs/zio_crypt.c @@ -1,2044 +1,2059 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2017, Datto, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include /* * This file is responsible for handling all of the details of generating * encryption parameters and performing encryption and authentication. * * BLOCK ENCRYPTION PARAMETERS: * Encryption /Authentication Algorithm Suite (crypt): * The encryption algorithm, mode, and key length we are going to use. We * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit * keys. All authentication is currently done with SHA512-HMAC. * * Plaintext: * The unencrypted data that we want to encrypt. * * Initialization Vector (IV): * An initialization vector for the encryption algorithms. This is used to * "tweak" the encryption algorithms so that two blocks of the same data are * encrypted into different ciphertext outputs, thus obfuscating block patterns. * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is * never reused with the same encryption key. This value is stored unencrypted * and must simply be provided to the decryption function. We use a 96 bit IV * (as recommended by NIST) for all block encryption. For non-dedup blocks we * derive the IV randomly. The first 64 bits of the IV are stored in the second * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of * level 0 blocks is the number of allocated dnodes in that block. The on-disk * format supports at most 2^15 slots per L0 dnode block, because the maximum * block size is 16MB (2^24). In either case, for level 0 blocks this number * will still be smaller than UINT32_MAX so it is safe to store the IV in the * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count * for the dnode code. * * Master key: * This is the most important secret data of an encrypted dataset. It is used * along with the salt to generate that actual encryption keys via HKDF. We * do not use the master key to directly encrypt any data because there are * theoretical limits on how much data can actually be safely encrypted with * any encryption mode. The master key is stored encrypted on disk with the * user's wrapping key. Its length is determined by the encryption algorithm. * For details on how this is stored see the block comment in dsl_crypt.c * * Salt: * Used as an input to the HKDF function, along with the master key. We use a * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt * can be used for encrypting many blocks, so we cache the current salt and the * associated derived key in zio_crypt_t so we do not need to derive it again * needlessly. * * Encryption Key: * A secret binary key, generated from an HKDF function used to encrypt and * decrypt data. * * Message Authentication Code (MAC) * The MAC is an output of authenticated encryption modes such as AES-GCM and * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted * data on disk and return garbage to the application. Effectively, it is a * checksum that can not be reproduced by an attacker. We store the MAC in the * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated * regular checksum of the ciphertext which can be used for scrubbing. * * OBJECT AUTHENTICATION: * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because * they contain some info that always needs to be readable. To prevent this * data from being altered, we authenticate this data using SHA512-HMAC. This * will produce a MAC (similar to the one produced via encryption) which can * be used to verify the object was not modified. HMACs do not require key * rotation or IVs, so we can keep up to the full 3 copies of authenticated * data. * * ZIL ENCRYPTION: * ZIL blocks have their bp written to disk ahead of the associated data, so we * cannot store the MAC there as we normally do. For these blocks the MAC is * stored in the embedded checksum within the zil_chain_t header. The salt and * IV are generated for the block on bp allocation instead of at encryption * time. In addition, ZIL blocks have some pieces that must be left in plaintext * for claiming even though all of the sensitive user data still needs to be * encrypted. The function zio_crypt_init_uios_zil() handles parsing which * pieces of the block need to be encrypted. All data that is not encrypted is * authenticated using the AAD mechanisms that the supported encryption modes * provide for. In order to preserve the semantics of the ZIL for encrypted * datasets, the ZIL is not protected at the objset level as described below. * * DNODE ENCRYPTION: * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left * in plaintext for scrubbing and claiming, but the bonus buffers might contain * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing * which pieces of the block need to be encrypted. For more details about * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). * * OBJECT SET AUTHENTICATION: * Up to this point, everything we have encrypted and authenticated has been * at level 0 (or -2 for the ZIL). If we did not do any further work the * on-disk format would be susceptible to attacks that deleted or rearranged * the order of level 0 blocks. Ideally, the cleanest solution would be to * maintain a tree of authentication MACs going up the bp tree. However, this * presents a problem for raw sends. Send files do not send information about * indirect blocks so there would be no convenient way to transfer the MACs and * they cannot be recalculated on the receive side without the master key which * would defeat one of the purposes of raw sends in the first place. Instead, * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs * from the level below. We also include some portable fields from blk_prop such * as the lsize and compression algorithm to prevent the data from being * misinterpreted. * * At the objset level, we maintain 2 separate 256 bit MACs in the * objset_phys_t. The first one is "portable" and is the logical root of the * MAC tree maintained in the metadnode's bps. The second, is "local" and is * used as the root MAC for the user accounting objects, which are also not * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload * of the send file. The useraccounting code ensures that the useraccounting * info is not present upon a receive, so the local MAC can simply be cleared * out at that time. For more info about objset_phys_t authentication, see * zio_crypt_do_objset_hmacs(). * * CONSIDERATIONS FOR DEDUP: * In order for dedup to work, blocks that we want to dedup with one another * need to use the same IV and encryption key, so that they will have the same * ciphertext. Normally, one should never reuse an IV with the same encryption * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both * blocks. In this case, however, since we are using the same plaintext as * well all that we end up with is a duplicate of the original ciphertext we * already had. As a result, an attacker with read access to the raw disk will * be able to tell which blocks are the same but this information is given away * by dedup anyway. In order to get the same IVs and encryption keys for * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC * here so that a reproducible checksum of the plaintext is never available to * the attacker. The HMAC key is kept alongside the master key, encrypted on * disk. The first 64 bits of the HMAC are used in place of the random salt, and * the next 96 bits are used as the IV. As a result of this mechanism, dedup * will only work within a clone family since encrypted dedup requires use of * the same master and HMAC keys. */ /* * After encrypting many blocks with the same key we may start to run up * against the theoretical limits of how much data can securely be encrypted * with a single key using the supported encryption modes. The most obvious * limitation is that our risk of generating 2 equivalent 96 bit IVs increases * the more IVs we generate (which both GCM and CCM modes strictly forbid). * This risk actually grows surprisingly quickly over time according to the * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have * generated n IVs with a cryptographically secure RNG, the approximate * probability p(n) of a collision is given as: * * p(n) ~= e^(-n*(n-1)/(2*(2^96))) * * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html] * * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion * we must not write more than 398,065,730 blocks with the same encryption key. * Therefore, we rotate our keys after 400,000,000 blocks have been written by * generating a new random 64 bit salt for our HKDF encryption key generation * function. */ #define ZFS_KEY_MAX_SALT_USES_DEFAULT 400000000 #define ZFS_CURRENT_MAX_SALT_USES \ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) static unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; typedef struct blkptr_auth_buf { uint64_t bab_prop; /* blk_prop - portable mask */ uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */ uint64_t bab_pad; /* reserved for future use */ } blkptr_auth_buf_t; const zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { {"", ZC_TYPE_NONE, 0, "inherit"}, {"", ZC_TYPE_NONE, 0, "on"}, {"", ZC_TYPE_NONE, 0, "off"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"}, {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"}, {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"} }; void zio_crypt_key_destroy(zio_crypt_key_t *key) { rw_destroy(&key->zk_salt_lock); /* free crypto templates */ crypto_destroy_ctx_template(key->zk_current_tmpl); crypto_destroy_ctx_template(key->zk_hmac_tmpl); /* zero out sensitive data */ bzero(key, sizeof (zio_crypt_key_t)); } int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) { int ret; crypto_mechanism_t mech; uint_t keydata_len; ASSERT(key != NULL); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); keydata_len = zio_crypt_table[crypt].ci_keylen; bzero(key, sizeof (zio_crypt_key_t)); /* fill keydata buffers and salt with random data */ ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t)); if (ret != 0) goto error; ret = random_get_bytes(key->zk_master_keydata, keydata_len); if (ret != 0) goto error; ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN); if (ret != 0) goto error; ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; /* derive the current key from the master key */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto error; /* initialize keys for the ICP */ key->zk_current_key.ck_format = CRYPTO_KEY_RAW; key->zk_current_key.ck_data = key->zk_current_keydata; key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; key->zk_hmac_key.ck_data = &key->zk_hmac_key; key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); /* * Initialize the crypto templates. It's ok if this fails because * this is just an optimization. */ mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); ret = crypto_create_ctx_template(&mech, &key->zk_current_key, &key->zk_current_tmpl, KM_SLEEP); if (ret != CRYPTO_SUCCESS) key->zk_current_tmpl = NULL; mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, &key->zk_hmac_tmpl, KM_SLEEP); if (ret != CRYPTO_SUCCESS) key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); return (0); error: zio_crypt_key_destroy(key); return (ret); } static int zio_crypt_key_change_salt(zio_crypt_key_t *key) { int ret = 0; uint8_t salt[ZIO_DATA_SALT_LEN]; crypto_mechanism_t mech; uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen; /* generate a new salt */ ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; rw_enter(&key->zk_salt_lock, RW_WRITER); /* someone beat us to the salt rotation, just unlock and return */ if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES) goto out_unlock; /* derive the current key from the master key and the new salt */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto out_unlock; /* assign the salt and reset the usage count */ bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN); key->zk_salt_count = 0; /* destroy the old context template and create the new one */ crypto_destroy_ctx_template(key->zk_current_tmpl); ret = crypto_create_ctx_template(&mech, &key->zk_current_key, &key->zk_current_tmpl, KM_SLEEP); if (ret != CRYPTO_SUCCESS) key->zk_current_tmpl = NULL; rw_exit(&key->zk_salt_lock); return (0); out_unlock: rw_exit(&key->zk_salt_lock); error: return (ret); } /* See comment above zfs_key_max_salt_uses definition for details */ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) { int ret; boolean_t salt_change; rw_enter(&key->zk_salt_lock, RW_READER); bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN); salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >= ZFS_CURRENT_MAX_SALT_USES); rw_exit(&key->zk_salt_lock); if (salt_change) { ret = zio_crypt_key_change_salt(key); if (ret != 0) goto error; } return (0); error: return (ret); } /* * This function handles all encryption and decryption in zfs. When * encrypting it expects puio to reference the plaintext and cuio to * reference the ciphertext. cuio must have enough space for the * ciphertext + room for a MAC. datalen should be the length of the * plaintext / ciphertext alone. */ static int zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *cuio, uint8_t *authbuf, uint_t auth_len) { int ret; crypto_data_t plaindata, cipherdata; CK_AES_CCM_PARAMS ccmp; CK_AES_GCM_PARAMS gcmp; crypto_mechanism_t mech; zio_crypt_info_t crypt_info; uint_t plain_full_len, maclen; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW); /* lookup the encryption info */ crypt_info = zio_crypt_table[crypt]; /* the mac will always be the last iovec_t in the cipher uio */ maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len; ASSERT(maclen <= ZIO_DATA_MAC_LEN); /* setup encryption mechanism (same as crypt) */ mech.cm_type = crypto_mech2id(crypt_info.ci_mechname); /* * Strangely, the ICP requires that plain_full_len must include * the MAC length when decrypting, even though the UIO does not * need to have the extra space allocated. */ if (encrypt) { plain_full_len = datalen; } else { plain_full_len = datalen + maclen; } /* * setup encryption params (currently only AES CCM and AES GCM * are supported) */ if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) { ccmp.ulNonceSize = ZIO_DATA_IV_LEN; ccmp.ulAuthDataSize = auth_len; ccmp.authData = authbuf; ccmp.ulMACSize = maclen; ccmp.nonce = ivbuf; ccmp.ulDataSize = plain_full_len; mech.cm_param = (char *)(&ccmp); mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); } else { gcmp.ulIvLen = ZIO_DATA_IV_LEN; gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN); gcmp.ulAADLen = auth_len; gcmp.pAAD = authbuf; gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen); gcmp.pIv = ivbuf; mech.cm_param = (char *)(&gcmp); mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); } /* populate the cipher and plain data structs. */ plaindata.cd_format = CRYPTO_DATA_UIO; plaindata.cd_offset = 0; plaindata.cd_uio = puio; plaindata.cd_miscdata = NULL; plaindata.cd_length = plain_full_len; cipherdata.cd_format = CRYPTO_DATA_UIO; cipherdata.cd_offset = 0; cipherdata.cd_uio = cuio; cipherdata.cd_miscdata = NULL; cipherdata.cd_length = datalen + maclen; /* perform the actual encryption */ if (encrypt) { ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } } else { ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata, NULL); if (ret != CRYPTO_SUCCESS) { ASSERT3U(ret, ==, CRYPTO_INVALID_MAC); ret = SET_ERROR(ECKSUM); goto error; } } return (0); error: return (ret); } int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) { int ret; zfs_uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint64_t crypt = key->zk_crypt; uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); keydata_len = zio_crypt_table[crypt].ci_keylen; /* generate iv for wrapping the master and hmac key */ ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN); if (ret != 0) goto error; /* initialize zfs_uio_ts */ plain_iovecs[0].iov_base = key->zk_master_keydata; plain_iovecs[0].iov_len = keydata_len; plain_iovecs[1].iov_base = key->zk_hmac_keydata; plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; cipher_iovecs[0].iov_base = keydata_out; cipher_iovecs[0].iov_len = keydata_len; cipher_iovecs[1].iov_base = hmac_keydata_out; cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; /* * Although we don't support writing to the old format, we do * support rewrapping the key so that the user can move and * quarantine datasets on the old format. */ if (key->zk_version == 0) { aad_len = sizeof (uint64_t); aad[0] = LE_64(key->zk_guid); } else { ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); aad_len = sizeof (uint64_t) * 3; aad[0] = LE_64(key->zk_guid); aad[1] = LE_64(crypt); aad[2] = LE_64(key->zk_version); } enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_iovcnt = 2; puio.uio_segflg = UIO_SYSSPACE; cuio.uio_iov = cipher_iovecs; cuio.uio_iovcnt = 3; cuio.uio_segflg = UIO_SYSSPACE; /* encrypt the keys and store the resulting ciphertext and mac */ ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; return (0); error: return (ret); } int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) { crypto_mechanism_t mech; zfs_uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint_t enc_len, keydata_len, aad_len; int ret; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); keydata_len = zio_crypt_table[crypt].ci_keylen; /* initialize zfs_uio_ts */ plain_iovecs[0].iov_base = key->zk_master_keydata; plain_iovecs[0].iov_len = keydata_len; plain_iovecs[1].iov_base = key->zk_hmac_keydata; plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; cipher_iovecs[0].iov_base = keydata; cipher_iovecs[0].iov_len = keydata_len; cipher_iovecs[1].iov_base = hmac_keydata; cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; if (version == 0) { aad_len = sizeof (uint64_t); aad[0] = LE_64(guid); } else { ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); aad_len = sizeof (uint64_t) * 3; aad[0] = LE_64(guid); aad[1] = LE_64(crypt); aad[2] = LE_64(version); } enc_len = keydata_len + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_segflg = UIO_SYSSPACE; puio.uio_iovcnt = 2; cuio.uio_iov = cipher_iovecs; cuio.uio_iovcnt = 3; cuio.uio_segflg = UIO_SYSSPACE; /* decrypt the keys and store the result in the output buffers */ ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; /* generate a fresh salt */ ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); if (ret != 0) goto error; /* derive the current key from the master key */ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); if (ret != 0) goto error; /* initialize keys for ICP */ key->zk_current_key.ck_format = CRYPTO_KEY_RAW; key->zk_current_key.ck_data = key->zk_current_keydata; key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; key->zk_hmac_key.ck_data = key->zk_hmac_keydata; key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); /* * Initialize the crypto templates. It's ok if this fails because * this is just an optimization. */ mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); ret = crypto_create_ctx_template(&mech, &key->zk_current_key, &key->zk_current_tmpl, KM_SLEEP); if (ret != CRYPTO_SUCCESS) key->zk_current_tmpl = NULL; mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, &key->zk_hmac_tmpl, KM_SLEEP); if (ret != CRYPTO_SUCCESS) key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; return (0); error: zio_crypt_key_destroy(key); return (ret); } int zio_crypt_generate_iv(uint8_t *ivbuf) { int ret; /* randomly generate the IV */ ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN); if (ret != 0) goto error; return (0); error: bzero(ivbuf, ZIO_DATA_IV_LEN); return (ret); } int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *digestbuf, uint_t digestlen) { int ret; crypto_mechanism_t mech; crypto_data_t in_data, digest_data; uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH]; ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH); /* initialize sha512-hmac mechanism and crypto data */ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); mech.cm_param = NULL; mech.cm_param_len = 0; /* initialize the crypto data */ in_data.cd_format = CRYPTO_DATA_RAW; in_data.cd_offset = 0; in_data.cd_length = datalen; in_data.cd_raw.iov_base = (char *)data; in_data.cd_raw.iov_len = in_data.cd_length; digest_data.cd_format = CRYPTO_DATA_RAW; digest_data.cd_offset = 0; digest_data.cd_length = SHA512_DIGEST_LENGTH; digest_data.cd_raw.iov_base = (char *)raw_digestbuf; digest_data.cd_raw.iov_len = digest_data.cd_length; /* generate the hmac */ ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl, &digest_data, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } bcopy(raw_digestbuf, digestbuf, digestlen); return (0); error: bzero(digestbuf, digestlen); return (ret); } int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *ivbuf, uint8_t *salt) { int ret; uint8_t digestbuf[SHA512_DIGEST_LENGTH]; ret = zio_crypt_do_hmac(key, data, datalen, digestbuf, SHA512_DIGEST_LENGTH); if (ret != 0) return (ret); bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN); bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN); return (0); } /* * The following functions are used to encode and decode encryption parameters * into blkptr_t and zil_header_t. The ICP wants to use these parameters as * byte strings, which normally means that these strings would not need to deal * with byteswapping at all. However, both blkptr_t and zil_header_t may be * byteswapped by lower layers and so we must "undo" that byteswap here upon * decoding and encoding in a non-native byteorder. These functions require * that the byteorder bit is correct before being called. */ void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) { uint64_t val64; uint32_t val32; ASSERT(BP_IS_ENCRYPTED(bp)); if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); BP_SET_IV2(bp, val32); } else { bcopy(salt, &val64, sizeof (uint64_t)); bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); bcopy(iv, &val64, sizeof (uint64_t)); bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); BP_SET_IV2(bp, BSWAP_32(val32)); } } void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) { uint64_t val64; uint32_t val32; ASSERT(BP_IS_PROTECTED(bp)); /* for convenience, so callers don't need to check */ if (BP_IS_AUTHENTICATED(bp)) { bzero(salt, ZIO_DATA_SALT_LEN); bzero(iv, ZIO_DATA_IV_LEN); return; } if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t)); bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t)); val32 = (uint32_t)BP_GET_IV2(bp); bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); } else { val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]); bcopy(&val64, salt, sizeof (uint64_t)); val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]); bcopy(&val64, iv, sizeof (uint64_t)); val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp)); bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); } } void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) { uint64_t val64; ASSERT(BP_USES_CRYPT(bp)); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], sizeof (uint64_t)); } else { bcopy(mac, &val64, sizeof (uint64_t)); bp->blk_cksum.zc_word[2] = BSWAP_64(val64); bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); bp->blk_cksum.zc_word[3] = BSWAP_64(val64); } } void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac) { uint64_t val64; ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp)); /* for convenience, so callers don't need to check */ if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { bzero(mac, ZIO_DATA_MAC_LEN); return; } if (!BP_SHOULD_BYTESWAP(bp)) { bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t)); bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t), sizeof (uint64_t)); } else { val64 = BSWAP_64(bp->blk_cksum.zc_word[2]); bcopy(&val64, mac, sizeof (uint64_t)); val64 = BSWAP_64(bp->blk_cksum.zc_word[3]); bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t)); } } void zio_crypt_encode_mac_zil(void *data, uint8_t *mac) { zil_chain_t *zilc = data; bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t)); bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3], sizeof (uint64_t)); } void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac) { /* * The ZIL MAC is embedded in the block it protects, which will * not have been byteswapped by the time this function has been called. * As a result, we don't need to worry about byteswapping the MAC. */ const zil_chain_t *zilc = data; bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t)); bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t), sizeof (uint64_t)); } /* * This routine takes a block of dnodes (src_abd) and copies only the bonus * buffers to the same offsets in the dst buffer. datalen should be the size * of both the src_abd and the dst buffer (not just the length of the bonus * buffers). */ void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) { uint_t i, max_dnp = datalen >> DNODE_SHIFT; uint8_t *src; dnode_phys_t *dnp, *sdnp, *ddnp; src = abd_borrow_buf_copy(src_abd, datalen); sdnp = (dnode_phys_t *)src; ddnp = (dnode_phys_t *)dst; for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { dnp = &sdnp[i]; if (dnp->dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && dnp->dn_bonuslen != 0) { bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), DN_MAX_BONUS_LEN(dnp)); } } abd_return_buf(src_abd, src, datalen); } /* * This function decides what fields from blk_prop are included in * the on-disk various MAC algorithms. */ static void zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) { /* * Version 0 did not properly zero out all non-portable fields * as it should have done. We maintain this code so that we can * do read-only imports of pools on this version. */ if (version == 0) { BP_SET_DEDUP(bp, 0); BP_SET_CHECKSUM(bp, 0); BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); return; } ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); /* * The hole_birth feature might set these fields even if this bp * is a hole. We zero them out here to guarantee that raw sends * will function with or without the feature. */ if (BP_IS_HOLE(bp)) { bp->blk_prop = 0ULL; return; } /* * At L0 we want to verify these fields to ensure that data blocks * can not be reinterpreted. For instance, we do not want an attacker * to trick us into returning raw lz4 compressed data to the user * by modifying the compression bits. At higher levels, we cannot * enforce this policy since raw sends do not convey any information * about indirect blocks, so these values might be different on the * receive side. Fortunately, this does not open any new attack * vectors, since any alterations that can be made to a higher level * bp must still verify the correct order of the layer below it. */ if (BP_GET_LEVEL(bp) != 0) { BP_SET_BYTEORDER(bp, 0); BP_SET_COMPRESS(bp, 0); /* * psize cannot be set to zero or it will trigger * asserts, but the value doesn't really matter as * long as it is constant. */ BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); } BP_SET_DEDUP(bp, 0); BP_SET_CHECKSUM(bp, 0); } static void zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, blkptr_auth_buf_t *bab, uint_t *bab_len) { blkptr_t tmpbp = *bp; if (should_bswap) byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); ASSERT0(BP_IS_EMBEDDED(&tmpbp)); zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); /* * We always MAC blk_prop in LE to ensure portability. This * must be done after decoding the mac, since the endianness * will get zero'd out here. */ zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); bab->bab_prop = LE_64(tmpbp.blk_prop); bab->bab_pad = 0ULL; /* version 0 did not include the padding */ *bab_len = sizeof (blkptr_auth_buf_t); if (version == 0) *bab_len -= sizeof (uint64_t); } static int zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { int ret; uint_t bab_len; blkptr_auth_buf_t bab; crypto_data_t cd; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); cd.cd_format = CRYPTO_DATA_RAW; cd.cd_offset = 0; cd.cd_length = bab_len; cd.cd_raw.iov_base = (char *)&bab; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } return (0); error: return (ret); } static void zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { uint_t bab_len; blkptr_auth_buf_t bab; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); SHA2Update(ctx, &bab, bab_len); } static void zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { uint_t bab_len; blkptr_auth_buf_t bab; zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); bcopy(&bab, *aadp, bab_len); *aadp += bab_len; *aad_len += bab_len; } static int zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, boolean_t should_bswap, dnode_phys_t *dnp) { int ret, i; dnode_phys_t *adnp, tmp_dncore; size_t dn_core_size = offsetof(dnode_phys_t, dn_blkptr); boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); crypto_data_t cd; cd.cd_format = CRYPTO_DATA_RAW; cd.cd_offset = 0; /* * Authenticate the core dnode (masking out non-portable bits). * We only copy the first 64 bytes we operate on to avoid the overhead * of copying 512-64 unneeded bytes. The compiler seems to be fine * with that. */ bcopy(dnp, &tmp_dncore, dn_core_size); adnp = &tmp_dncore; if (le_bswap) { adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec); adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen); adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid); adnp->dn_used = BSWAP_64(adnp->dn_used); } adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; adnp->dn_used = 0; cd.cd_length = dn_core_size; cd.cd_raw.iov_base = (char *)adnp; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } for (i = 0; i < dnp->dn_nblkptr; i++) { ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, &dnp->dn_blkptr[i]); if (ret != 0) goto error; } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, DN_SPILL_BLKPTR(dnp)); if (ret != 0) goto error; } return (0); error: return (ret); } /* * objset_phys_t blocks introduce a number of exceptions to the normal * authentication process. objset_phys_t's contain 2 separate HMACS for * protecting the integrity of their data. The portable_mac protects the * metadnode. This MAC can be sent with a raw send and protects against * reordering of data within the metadnode. The local_mac protects the user * accounting objects which are not sent from one system to another. * * In addition, objset blocks are the only blocks that can be modified and * written to disk without the key loaded under certain circumstances. During * zil_claim() we need to be able to update the zil_header_t to complete * claiming log blocks and during raw receives we need to write out the * portable_mac from the send file. Both of these actions are possible * because these fields are not protected by either MAC so neither one will * need to modify the MACs without the key. However, when the modified blocks * are written out they will be byteswapped into the host machine's native * endianness which will modify fields protected by the MAC. As a result, MAC * calculation for objset blocks works slightly differently from other block * types. Where other block types MAC the data in whatever endianness is * written to disk, objset blocks always MAC little endian version of their * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP() * and le_bswap indicates whether a byteswap is needed to get this block * into little endian format. */ int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac) { int ret; crypto_mechanism_t mech; crypto_context_t ctx; crypto_data_t cd; objset_phys_t *osp = data; uint64_t intval; boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH]; uint8_t raw_local_mac[SHA512_DIGEST_LENGTH]; /* initialize HMAC mechanism */ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); mech.cm_param = NULL; mech.cm_param_len = 0; cd.cd_format = CRYPTO_DATA_RAW; cd.cd_offset = 0; /* calculate the portable MAC from the portable fields and metadnode */ ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } /* add in the os_type */ intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type); cd.cd_length = sizeof (uint64_t); cd.cd_raw.iov_base = (char *)&intval; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } /* add in the portable os_flags */ intval = osp->os_flags; if (should_bswap) intval = BSWAP_64(intval); intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK; if (!ZFS_HOST_BYTEORDER) intval = BSWAP_64(intval); cd.cd_length = sizeof (uint64_t); cd.cd_raw.iov_base = (char *)&intval; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } /* add in fields from the metadnode */ ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_meta_dnode); if (ret) goto error; /* store the final digest in a temporary buffer and copy what we need */ cd.cd_length = SHA512_DIGEST_LENGTH; cd.cd_raw.iov_base = (char *)raw_portable_mac; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_final(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); + /* + * This is necessary here as we check next whether + * OBJSET_FLAG_USERACCOUNTING_COMPLETE is set in order to + * decide if the local_mac should be zeroed out. That flag will always + * be set by dmu_objset_id_quota_upgrade_cb() and + * dmu_objset_userspace_upgrade_cb() if useraccounting has been + * completed. + */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + boolean_t uacct_incomplete = + !(intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE); + /* * The local MAC protects the user, group and project accounting. * If these objects are not present, the local MAC is zeroed out. */ - if ((datalen >= OBJSET_PHYS_SIZE_V3 && + if (uacct_incomplete || + (datalen >= OBJSET_PHYS_SIZE_V3 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE && osp->os_projectused_dnode.dn_type == DMU_OT_NONE) || (datalen >= OBJSET_PHYS_SIZE_V2 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE) || (datalen <= OBJSET_PHYS_SIZE_V1)) { bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (0); } /* calculate the local MAC from the userused and groupused dnodes */ ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } /* add in the non-portable os_flags */ intval = osp->os_flags; if (should_bswap) intval = BSWAP_64(intval); intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK; if (!ZFS_HOST_BYTEORDER) intval = BSWAP_64(intval); cd.cd_length = sizeof (uint64_t); cd.cd_raw.iov_base = (char *)&intval; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } /* add in fields from the user accounting dnodes */ if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_userused_dnode); if (ret) goto error; } if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_groupused_dnode); if (ret) goto error; } if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE && datalen >= OBJSET_PHYS_SIZE_V3) { ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, should_bswap, &osp->os_projectused_dnode); if (ret) goto error; } /* store the final digest in a temporary buffer and copy what we need */ cd.cd_length = SHA512_DIGEST_LENGTH; cd.cd_raw.iov_base = (char *)raw_local_mac; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_final(ctx, &cd, NULL); if (ret != CRYPTO_SUCCESS) { ret = SET_ERROR(EIO); goto error; } bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN); return (0); error: bzero(portable_mac, ZIO_OBJSET_MAC_LEN); bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (ret); } static void zio_crypt_destroy_uio(zfs_uio_t *uio) { if (uio->uio_iov) kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t)); } /* * This function parses an uncompressed indirect block and returns a checksum * of all the portable fields from all of the contained bps. The portable * fields are the MAC and all of the fields from blk_prop except for the dedup, * checksum, and psize bits. For an explanation of the purpose of this, see * the comment block on object set authentication. */ static int zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) { blkptr_t *bp; int i, epb = datalen >> SPA_BLKPTRSHIFT; SHA2_CTX ctx; uint8_t digestbuf[SHA512_DIGEST_LENGTH]; /* checksum all of the MACs from the layer below */ SHA2Init(SHA512, &ctx); for (i = 0, bp = buf; i < epb; i++, bp++) { zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, byteswap, bp); } SHA2Final(digestbuf, &ctx); if (generate) { bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN); return (0); } if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0) return (SET_ERROR(ECKSUM)); return (0); } int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { int ret; /* * Unfortunately, callers of this function will not always have * easy access to the on-disk format version. This info is * normally found in the DSL Crypto Key, but the checksum-of-MACs * is expected to be verifiable even when the key isn't loaded. * Here, instead of doing a ZAP lookup for the version for each * zio, we simply try both existing formats. */ ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); if (ret == ECKSUM) { ASSERT(!generate); ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, datalen, 0, byteswap, cksum); } return (ret); } int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { int ret; void *buf; buf = abd_borrow_buf_copy(abd, datalen); ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen, byteswap, cksum); abd_return_buf(abd, buf, datalen); return (ret); } /* * Special case handling routine for encrypting / decrypting ZIL blocks. * We do not check for the older ZIL chain because the encryption feature * was not available before the newer ZIL chain was introduced. The goal * here is to encrypt everything except the blkptr_t of a lr_write_t and * the zil_chain_t header. Everything that is not encrypted is authenticated. */ static int zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; uint64_t txtype, lr_len; uint_t nr_src, nr_dst, crypt_len; uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp; zil_chain_t *zilc; lr_t *lr; uint8_t *aadbuf = zio_buf_alloc(datalen); /* cipherbuf always needs an extra iovec for the MAC */ if (encrypt) { src = plainbuf; dst = cipherbuf; nr_src = 0; nr_dst = 1; } else { src = cipherbuf; dst = plainbuf; nr_src = 1; nr_dst = 0; } bzero(dst, datalen); /* find the start and end record of the log block */ zilc = (zil_chain_t *)src; slrp = src + sizeof (zil_chain_t); aadp = aadbuf; blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); /* calculate the number of encrypted iovecs we will need */ for (; slrp < blkend; slrp += lr_len) { lr = (lr_t *)slrp; if (!byteswap) { txtype = lr->lrc_txtype; lr_len = lr->lrc_reclen; } else { txtype = BSWAP_64(lr->lrc_txtype); lr_len = BSWAP_64(lr->lrc_reclen); } nr_iovecs++; if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) nr_iovecs++; } nr_src += nr_iovecs; nr_dst += nr_iovecs; /* allocate the iovec arrays */ if (nr_src != 0) { src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); if (src_iovecs == NULL) { ret = SET_ERROR(ENOMEM); goto error; } } if (nr_dst != 0) { dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); if (dst_iovecs == NULL) { ret = SET_ERROR(ENOMEM); goto error; } } /* * Copy the plain zil header over and authenticate everything except * the checksum that will store our MAC. If we are writing the data * the embedded checksum will not have been calculated yet, so we don't * authenticate that. */ bcopy(src, dst, sizeof (zil_chain_t)); bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t)); aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t); aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t); /* loop over records again, filling in iovecs */ nr_iovecs = 0; slrp = src + sizeof (zil_chain_t); dlrp = dst + sizeof (zil_chain_t); for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) { lr = (lr_t *)slrp; if (!byteswap) { txtype = lr->lrc_txtype; lr_len = lr->lrc_reclen; } else { txtype = BSWAP_64(lr->lrc_txtype); lr_len = BSWAP_64(lr->lrc_reclen); } /* copy the common lr_t */ bcopy(slrp, dlrp, sizeof (lr_t)); bcopy(slrp, aadp, sizeof (lr_t)); aadp += sizeof (lr_t); aad_len += sizeof (lr_t); ASSERT3P(src_iovecs, !=, NULL); ASSERT3P(dst_iovecs, !=, NULL); /* * If this is a TX_WRITE record we want to encrypt everything * except the bp if exists. If the bp does exist we want to * authenticate it. */ if (txtype == TX_WRITE) { crypt_len = sizeof (lr_write_t) - sizeof (lr_t) - sizeof (blkptr_t); src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t); src_iovecs[nr_iovecs].iov_len = crypt_len; dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t); dst_iovecs[nr_iovecs].iov_len = crypt_len; /* copy the bp now since it will not be encrypted */ bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), dlrp + sizeof (lr_write_t) - sizeof (blkptr_t), sizeof (blkptr_t)); bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), aadp, sizeof (blkptr_t)); aadp += sizeof (blkptr_t); aad_len += sizeof (blkptr_t); nr_iovecs++; total_len += crypt_len; if (lr_len != sizeof (lr_write_t)) { crypt_len = lr_len - sizeof (lr_write_t); src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_write_t); src_iovecs[nr_iovecs].iov_len = crypt_len; dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_write_t); dst_iovecs[nr_iovecs].iov_len = crypt_len; nr_iovecs++; total_len += crypt_len; } } else { crypt_len = lr_len - sizeof (lr_t); src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t); src_iovecs[nr_iovecs].iov_len = crypt_len; dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t); dst_iovecs[nr_iovecs].iov_len = crypt_len; nr_iovecs++; total_len += crypt_len; } } *no_crypt = (nr_iovecs == 0); *enc_len = total_len; *authbuf = aadbuf; *auth_len = aad_len; if (encrypt) { puio->uio_iov = src_iovecs; puio->uio_iovcnt = nr_src; cuio->uio_iov = dst_iovecs; cuio->uio_iovcnt = nr_dst; } else { puio->uio_iov = dst_iovecs; puio->uio_iovcnt = nr_dst; cuio->uio_iov = src_iovecs; cuio->uio_iovcnt = nr_src; } return (0); error: zio_buf_free(aadbuf, datalen); if (src_iovecs != NULL) kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); if (dst_iovecs != NULL) kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); *enc_len = 0; *authbuf = NULL; *auth_len = 0; *no_crypt = B_FALSE; puio->uio_iov = NULL; puio->uio_iovcnt = 0; cuio->uio_iov = NULL; cuio->uio_iovcnt = 0; return (ret); } /* * Special case handling routine for encrypting / decrypting dnode blocks. */ static int zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; uint_t nr_src, nr_dst, crypt_len; uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; uint_t i, j, max_dnp = datalen >> DNODE_SHIFT; iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; uint8_t *src, *dst, *aadp; dnode_phys_t *dnp, *adnp, *sdnp, *ddnp; uint8_t *aadbuf = zio_buf_alloc(datalen); if (encrypt) { src = plainbuf; dst = cipherbuf; nr_src = 0; nr_dst = 1; } else { src = cipherbuf; dst = plainbuf; nr_src = 1; nr_dst = 0; } sdnp = (dnode_phys_t *)src; ddnp = (dnode_phys_t *)dst; aadp = aadbuf; /* * Count the number of iovecs we will need to do the encryption by * counting the number of bonus buffers that need to be encrypted. */ for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { /* * This block may still be byteswapped. However, all of the * values we use are either uint8_t's (for which byteswapping * is a noop) or a * != 0 check, which will work regardless * of whether or not we byteswap. */ if (sdnp[i].dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) && sdnp[i].dn_bonuslen != 0) { nr_iovecs++; } } nr_src += nr_iovecs; nr_dst += nr_iovecs; if (nr_src != 0) { src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); if (src_iovecs == NULL) { ret = SET_ERROR(ENOMEM); goto error; } } if (nr_dst != 0) { dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); if (dst_iovecs == NULL) { ret = SET_ERROR(ENOMEM); goto error; } } nr_iovecs = 0; /* * Iterate through the dnodes again, this time filling in the uios * we allocated earlier. We also concatenate any data we want to * authenticate onto aadbuf. */ for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { dnp = &sdnp[i]; /* copy over the core fields and blkptrs (kept as plaintext) */ bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp); if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]), sizeof (blkptr_t)); } /* * Handle authenticated data. We authenticate everything in * the dnode that can be brought over when we do a raw send. * This includes all of the core fields as well as the MACs * stored in the bp checksums and all of the portable bits * from blk_prop. We include the dnode padding here in case it * ever gets used in the future. Some dn_flags and dn_used are * not portable so we mask those out values out of the * authenticated data. */ crypt_len = offsetof(dnode_phys_t, dn_blkptr); bcopy(dnp, aadp, crypt_len); adnp = (dnode_phys_t *)aadp; adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; adnp->dn_used = 0; aadp += crypt_len; aad_len += crypt_len; for (j = 0; j < dnp->dn_nblkptr; j++) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, version, byteswap, &dnp->dn_blkptr[j]); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, version, byteswap, DN_SPILL_BLKPTR(dnp)); } /* * If this bonus buffer needs to be encrypted, we prepare an * iovec_t. The encryption / decryption functions will fill * this in for us with the encrypted or decrypted data. * Otherwise we add the bonus buffer to the authenticated * data buffer and copy it over to the destination. The * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that * we can guarantee alignment with the AES block size * (128 bits). */ crypt_len = DN_MAX_BONUS_LEN(dnp); if (dnp->dn_type != DMU_OT_NONE && DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && dnp->dn_bonuslen != 0) { ASSERT3U(nr_iovecs, <, nr_src); ASSERT3U(nr_iovecs, <, nr_dst); ASSERT3P(src_iovecs, !=, NULL); ASSERT3P(dst_iovecs, !=, NULL); src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp); src_iovecs[nr_iovecs].iov_len = crypt_len; dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]); dst_iovecs[nr_iovecs].iov_len = crypt_len; nr_iovecs++; total_len += crypt_len; } else { bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len); bcopy(DN_BONUS(dnp), aadp, crypt_len); aadp += crypt_len; aad_len += crypt_len; } } *no_crypt = (nr_iovecs == 0); *enc_len = total_len; *authbuf = aadbuf; *auth_len = aad_len; if (encrypt) { puio->uio_iov = src_iovecs; puio->uio_iovcnt = nr_src; cuio->uio_iov = dst_iovecs; cuio->uio_iovcnt = nr_dst; } else { puio->uio_iov = dst_iovecs; puio->uio_iovcnt = nr_dst; cuio->uio_iov = src_iovecs; cuio->uio_iovcnt = nr_src; } return (0); error: zio_buf_free(aadbuf, datalen); if (src_iovecs != NULL) kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); if (dst_iovecs != NULL) kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); *enc_len = 0; *authbuf = NULL; *auth_len = 0; *no_crypt = B_FALSE; puio->uio_iov = NULL; puio->uio_iovcnt = 0; cuio->uio_iov = NULL; cuio->uio_iovcnt = 0; return (ret); } static int zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len) { (void) encrypt; int ret; uint_t nr_plain = 1, nr_cipher = 2; iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; /* allocate the iovecs for the plain and cipher data */ plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t), KM_SLEEP); if (!plain_iovecs) { ret = SET_ERROR(ENOMEM); goto error; } cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), KM_SLEEP); if (!cipher_iovecs) { ret = SET_ERROR(ENOMEM); goto error; } plain_iovecs[0].iov_base = plainbuf; plain_iovecs[0].iov_len = datalen; cipher_iovecs[0].iov_base = cipherbuf; cipher_iovecs[0].iov_len = datalen; *enc_len = datalen; puio->uio_iov = plain_iovecs; puio->uio_iovcnt = nr_plain; cuio->uio_iov = cipher_iovecs; cuio->uio_iovcnt = nr_cipher; return (0); error: if (plain_iovecs != NULL) kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t)); if (cipher_iovecs != NULL) kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t)); *enc_len = 0; puio->uio_iov = NULL; puio->uio_iovcnt = 0; cuio->uio_iov = NULL; cuio->uio_iovcnt = 0; return (ret); } /* * This function builds up the plaintext (puio) and ciphertext (cuio) uios so * that they can be used for encryption and decryption by zio_do_crypt_uio(). * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks * requiring special handling to parse out pieces that are to be encrypted. The * authbuf is used by these special cases to store additional authenticated * data (AAD) for the encryption modes. */ static int zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; iovec_t *mac_iov; ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE); /* route to handler */ switch (ot) { case DMU_OT_INTENT_LOG: ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, no_crypt); break; case DMU_OT_DNODE: ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, no_crypt); break; default: ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, datalen, puio, cuio, enc_len); *authbuf = NULL; *auth_len = 0; *no_crypt = B_FALSE; break; } if (ret != 0) goto error; /* populate the uios */ puio->uio_segflg = UIO_SYSSPACE; cuio->uio_segflg = UIO_SYSSPACE; mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]); mac_iov->iov_base = mac; mac_iov->iov_len = ZIO_DATA_MAC_LEN; return (0); error: return (ret); } /* * Primary encryption / decryption entrypoint for zio data. */ int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt) { int ret; boolean_t locked = B_FALSE; uint64_t crypt = key->zk_crypt; uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; uint_t enc_len, auth_len; zfs_uio_t puio, cuio; uint8_t enc_keydata[MASTER_KEY_MAX_LEN]; crypto_key_t tmp_ckey, *ckey = NULL; crypto_ctx_template_t tmpl; uint8_t *authbuf = NULL; /* * If the needed key is the current one, just use it. Otherwise we * need to generate a temporary one from the given salt + master key. * If we are encrypting, we must return a copy of the current salt * so that it can be stored in the blkptr_t. */ rw_enter(&key->zk_salt_lock, RW_READER); locked = B_TRUE; if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) { ckey = &key->zk_current_key; tmpl = key->zk_current_tmpl; } else { rw_exit(&key->zk_salt_lock); locked = B_FALSE; ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len); if (ret != 0) goto error; tmp_ckey.ck_format = CRYPTO_KEY_RAW; tmp_ckey.ck_data = enc_keydata; tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len); ckey = &tmp_ckey; tmpl = NULL; } /* * Attempt to use QAT acceleration if we can. We currently don't * do this for metadnode and ZIL blocks, since they have a much * more involved buffer layout and the qat_crypt() function only * works in-place. */ if (qat_crypt_use_accel(datalen) && ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) { uint8_t *srcbuf, *dstbuf; if (encrypt) { srcbuf = plainbuf; dstbuf = cipherbuf; } else { srcbuf = cipherbuf; dstbuf = plainbuf; } ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf, dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen); if (ret == CPA_STATUS_SUCCESS) { if (locked) { rw_exit(&key->zk_salt_lock); locked = B_FALSE; } return (0); } /* If the hardware implementation fails fall back to software */ } bzero(&puio, sizeof (zfs_uio_t)); bzero(&cuio, sizeof (zfs_uio_t)); /* create uios for encryption */ ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, no_crypt); if (ret != 0) goto error; /* perform the encryption / decryption in software */ ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, &puio, &cuio, authbuf, auth_len); if (ret != 0) goto error; if (locked) { rw_exit(&key->zk_salt_lock); locked = B_FALSE; } if (authbuf != NULL) zio_buf_free(authbuf, datalen); if (ckey == &tmp_ckey) bzero(enc_keydata, keydata_len); zio_crypt_destroy_uio(&puio); zio_crypt_destroy_uio(&cuio); return (0); error: if (locked) rw_exit(&key->zk_salt_lock); if (authbuf != NULL) zio_buf_free(authbuf, datalen); if (ckey == &tmp_ckey) bzero(enc_keydata, keydata_len); zio_crypt_destroy_uio(&puio); zio_crypt_destroy_uio(&cuio); return (ret); } /* * Simple wrapper around zio_do_crypt_data() to work with abd's instead of * linear buffers. */ int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) { int ret; void *ptmp, *ctmp; if (encrypt) { ptmp = abd_borrow_buf_copy(pabd, datalen); ctmp = abd_borrow_buf(cabd, datalen); } else { ptmp = abd_borrow_buf(pabd, datalen); ctmp = abd_borrow_buf_copy(cabd, datalen); } ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac, datalen, ptmp, ctmp, no_crypt); if (ret != 0) goto error; if (encrypt) { abd_return_buf(pabd, ptmp, datalen); abd_return_buf_copy(cabd, ctmp, datalen); } else { abd_return_buf_copy(pabd, ptmp, datalen); abd_return_buf(cabd, ctmp, datalen); } return (0); error: if (encrypt) { abd_return_buf(pabd, ptmp, datalen); abd_return_buf_copy(cabd, ctmp, datalen); } else { abd_return_buf_copy(pabd, ptmp, datalen); abd_return_buf(cabd, ctmp, datalen); } return (ret); } #if defined(_KERNEL) /* BEGIN CSTYLED */ module_param(zfs_key_max_salt_uses, ulong, 0644); MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value " "can be used for generating encryption keys before it is rotated"); /* END CSTYLED */ #endif diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index dd37e3af7ed5..f574130e5049 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -1,859 +1,864 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2020 Oxide Computer Company */ #include #include #include #include #include #include #include #include #include #include #include static void dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) { dmu_buf_impl_t *db; int txgoff = tx->tx_txg & TXG_MASK; int nblkptr = dn->dn_phys->dn_nblkptr; int old_toplvl = dn->dn_phys->dn_nlevels - 1; int new_level = dn->dn_next_nlevels[txgoff]; int i; rw_enter(&dn->dn_struct_rwlock, RW_WRITER); /* this dnode can't be paged out because it's dirty */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); ASSERT(db != NULL); dn->dn_phys->dn_nlevels = new_level; dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, (u_longlong_t)dn->dn_object, dn->dn_phys->dn_nlevels); /* * Lock ordering requires that we hold the children's db_mutexes (by * calling dbuf_find()) before holding the parent's db_rwlock. The lock * order is imposed by dbuf_read's steps of "grab the lock to protect * db_parent, get db_parent, hold db_parent's db_rwlock". */ dmu_buf_impl_t *children[DN_MAX_NBLKPTR]; ASSERT3U(nblkptr, <=, DN_MAX_NBLKPTR); for (i = 0; i < nblkptr; i++) { children[i] = dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); } /* transfer dnode's block pointers to new indirect block */ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); if (dn->dn_dbuf != NULL) rw_enter(&dn->dn_dbuf->db_rwlock, RW_WRITER); rw_enter(&db->db_rwlock, RW_WRITER); ASSERT(db->db.db_data); ASSERT(arc_released(db->db_buf)); ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, sizeof (blkptr_t) * nblkptr); arc_buf_freeze(db->db_buf); /* set dbuf's parent pointers to new indirect buf */ for (i = 0; i < nblkptr; i++) { dmu_buf_impl_t *child = children[i]; if (child == NULL) continue; #ifdef ZFS_DEBUG DB_DNODE_ENTER(child); ASSERT3P(DB_DNODE(child), ==, dn); DB_DNODE_EXIT(child); #endif /* DEBUG */ if (child->db_parent && child->db_parent != dn->dn_dbuf) { ASSERT(child->db_parent->db_level == db->db_level); ASSERT(child->db_blkptr != &dn->dn_phys->dn_blkptr[child->db_blkid]); mutex_exit(&child->db_mtx); continue; } ASSERT(child->db_parent == NULL || child->db_parent == dn->dn_dbuf); child->db_parent = db; dbuf_add_ref(db, child); if (db->db.db_data) child->db_blkptr = (blkptr_t *)db->db.db_data + i; else child->db_blkptr = NULL; dprintf_dbuf_bp(child, child->db_blkptr, "changed db_blkptr to new indirect %s", ""); mutex_exit(&child->db_mtx); } bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); rw_exit(&db->db_rwlock); if (dn->dn_dbuf != NULL) rw_exit(&dn->dn_dbuf->db_rwlock); dbuf_rele(db, FTAG); rw_exit(&dn->dn_struct_rwlock); } static void free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) { dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; uint64_t bytesfreed = 0; dprintf("ds=%p obj=%llx num=%d\n", ds, (u_longlong_t)dn->dn_object, num); for (int i = 0; i < num; i++, bp++) { if (BP_IS_HOLE(bp)) continue; bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE); ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); /* * Save some useful information on the holes being * punched, including logical size, type, and indirection * level. Retaining birth time enables detection of when * holes are punched for reducing the number of free * records transmitted during a zfs send. */ uint64_t lsize = BP_GET_LSIZE(bp); dmu_object_type_t type = BP_GET_TYPE(bp); uint64_t lvl = BP_GET_LEVEL(bp); bzero(bp, sizeof (blkptr_t)); if (spa_feature_is_active(dn->dn_objset->os_spa, SPA_FEATURE_HOLE_BIRTH)) { BP_SET_LSIZE(bp, lsize); BP_SET_TYPE(bp, type); BP_SET_LEVEL(bp, lvl); BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0); } } dnode_diduse_space(dn, -bytesfreed); } #ifdef ZFS_DEBUG static void free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) { int off, num; int i, err, epbs; uint64_t txg = tx->tx_txg; dnode_t *dn; DB_DNODE_ENTER(db); dn = DB_DNODE(db); epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; off = start - (db->db_blkid * 1<=, 0); ASSERT3U(num, >=, 0); ASSERT3U(db->db_level, >, 0); ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); ASSERT(db->db_blkptr != NULL); for (i = off; i < off+num; i++) { uint64_t *buf; dmu_buf_impl_t *child; dbuf_dirty_record_t *dr; int j; ASSERT(db->db_level == 1); rw_enter(&dn->dn_struct_rwlock, RW_READER); err = dbuf_hold_impl(dn, db->db_level - 1, (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child); rw_exit(&dn->dn_struct_rwlock); if (err == ENOENT) continue; ASSERT(err == 0); ASSERT(child->db_level == 0); dr = dbuf_find_dirty_eq(child, txg); /* data_old better be zeroed */ if (dr) { buf = dr->dt.dl.dr_data->b_data; for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " "child=%p i=%d off=%d num=%d\n", (void *)child, i, off, num); } } } /* * db_data better be zeroed unless it's dirty in a * future txg. */ mutex_enter(&child->db_mtx); buf = child->db.db_data; if (buf != NULL && child->db_state != DB_FILL && list_is_empty(&child->db_dirty_records)) { for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " "child=%p i=%d off=%d num=%d\n", (void *)child, i, off, num); } } } mutex_exit(&child->db_mtx); dbuf_rele(child, FTAG); } DB_DNODE_EXIT(db); } #endif /* * We don't usually free the indirect blocks here. If in one txg we have a * free_range and a write to the same indirect block, it's important that we * preserve the hole's birth times. Therefore, we don't free any any indirect * blocks in free_children(). If an indirect block happens to turn into all * holes, it will be freed by dbuf_write_children_ready, which happens at a * point in the syncing process where we know for certain the contents of the * indirect block. * * However, if we're freeing a dnode, its space accounting must go to zero * before we actually try to free the dnode, or we will trip an assertion. In * addition, we know the case described above cannot occur, because the dnode is * being freed. Therefore, we free the indirect blocks immediately in that * case. */ static void free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, boolean_t free_indirects, dmu_tx_t *tx) { dnode_t *dn; blkptr_t *bp; dmu_buf_impl_t *subdb; uint64_t start, end, dbstart, dbend; unsigned int epbs, shift, i; /* * There is a small possibility that this block will not be cached: * 1 - if level > 1 and there are no children with level <= 1 * 2 - if this block was evicted since we read it from * dmu_tx_hold_free(). */ if (db->db_state != DB_CACHED) (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); /* * If we modify this indirect block, and we are not freeing the * dnode (!free_indirects), then this indirect block needs to get * written to disk by dbuf_write(). If it is dirty, we know it will * be written (otherwise, we would have incorrect on-disk state * because the space would be freed but still referenced by the BP * in this indirect block). Therefore we VERIFY that it is * dirty. * * Our VERIFY covers some cases that do not actually have to be * dirty, but the open-context code happens to dirty. E.g. if the * blocks we are freeing are all holes, because in that case, we * are only freeing part of this indirect block, so it is an * ancestor of the first or last block to be freed. The first and * last L1 indirect blocks are always dirtied by dnode_free_range(). */ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); VERIFY(BP_GET_FILL(db->db_blkptr) == 0 || db->db_dirtycnt > 0); dmu_buf_unlock_parent(db, dblt, FTAG); dbuf_release_bp(db); bp = db->db.db_data; DB_DNODE_ENTER(db); dn = DB_DNODE(db); epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; ASSERT3U(epbs, <, 31); shift = (db->db_level - 1) * epbs; dbstart = db->db_blkid << epbs; start = blkid >> shift; if (dbstart < start) { bp += start - dbstart; } else { start = dbstart; } dbend = ((db->db_blkid + 1) << epbs) - 1; end = (blkid + nblks - 1) >> shift; if (dbend <= end) end = dbend; ASSERT3U(start, <=, end); if (db->db_level == 1) { FREE_VERIFY(db, start, end, tx); rw_enter(&db->db_rwlock, RW_WRITER); free_blocks(dn, bp, end - start + 1, tx); rw_exit(&db->db_rwlock); } else { for (uint64_t id = start; id <= end; id++, bp++) { if (BP_IS_HOLE(bp)) continue; rw_enter(&dn->dn_struct_rwlock, RW_READER); VERIFY0(dbuf_hold_impl(dn, db->db_level - 1, id, TRUE, FALSE, FTAG, &subdb)); rw_exit(&dn->dn_struct_rwlock); ASSERT3P(bp, ==, subdb->db_blkptr); free_children(subdb, blkid, nblks, free_indirects, tx); dbuf_rele(subdb, FTAG); } } if (free_indirects) { rw_enter(&db->db_rwlock, RW_WRITER); for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) ASSERT(BP_IS_HOLE(bp)); bzero(db->db.db_data, db->db.db_size); free_blocks(dn, db->db_blkptr, 1, tx); rw_exit(&db->db_rwlock); } DB_DNODE_EXIT(db); arc_buf_freeze(db->db_buf); } /* * Traverse the indicated range of the provided file * and "free" all the blocks contained there. */ static void dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, boolean_t free_indirects, dmu_tx_t *tx) { blkptr_t *bp = dn->dn_phys->dn_blkptr; int dnlevel = dn->dn_phys->dn_nlevels; boolean_t trunc = B_FALSE; if (blkid > dn->dn_phys->dn_maxblkid) return; ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); if (blkid + nblks > dn->dn_phys->dn_maxblkid) { nblks = dn->dn_phys->dn_maxblkid - blkid + 1; trunc = B_TRUE; } /* There are no indirect blocks in the object */ if (dnlevel == 1) { if (blkid >= dn->dn_phys->dn_nblkptr) { /* this range was never made persistent */ return; } ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); free_blocks(dn, bp + blkid, nblks, tx); } else { int shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); int start = blkid >> shift; int end = (blkid + nblks - 1) >> shift; dmu_buf_impl_t *db; ASSERT(start < dn->dn_phys->dn_nblkptr); bp += start; for (int i = start; i <= end; i++, bp++) { if (BP_IS_HOLE(bp)) continue; rw_enter(&dn->dn_struct_rwlock, RW_READER); VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i, TRUE, FALSE, FTAG, &db)); rw_exit(&dn->dn_struct_rwlock); free_children(db, blkid, nblks, free_indirects, tx); dbuf_rele(db, FTAG); } } /* * Do not truncate the maxblkid if we are performing a raw * receive. The raw receive sets the maxblkid manually and * must not be overridden. Usually, the last DRR_FREE record * will be at the maxblkid, because the source system sets * the maxblkid when truncating. However, if the last block * was freed by overwriting with zeros and being compressed * away to a hole, the source system will generate a DRR_FREE * record while leaving the maxblkid after the end of that * record. In this case we need to leave the maxblkid as * indicated in the DRR_OBJECT record, so that it matches the * source system, ensuring that the cryptographic hashes will * match. */ if (trunc && !dn->dn_objset->os_raw_receive) { uint64_t off __maybe_unused; dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; off = (dn->dn_phys->dn_maxblkid + 1) * (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); ASSERT(off < dn->dn_phys->dn_maxblkid || dn->dn_phys->dn_maxblkid == 0 || dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); } } typedef struct dnode_sync_free_range_arg { dnode_t *dsfra_dnode; dmu_tx_t *dsfra_tx; boolean_t dsfra_free_indirects; } dnode_sync_free_range_arg_t; static void dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks) { dnode_sync_free_range_arg_t *dsfra = arg; dnode_t *dn = dsfra->dsfra_dnode; mutex_exit(&dn->dn_mtx); dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_free_indirects, dsfra->dsfra_tx); mutex_enter(&dn->dn_mtx); } /* * Try to kick all the dnode's dbufs out of the cache... */ void dnode_evict_dbufs(dnode_t *dn) { dmu_buf_impl_t *db_marker; dmu_buf_impl_t *db, *db_next; db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); mutex_enter(&dn->dn_dbufs_mtx); for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { #ifdef ZFS_DEBUG DB_DNODE_ENTER(db); ASSERT3P(DB_DNODE(db), ==, dn); DB_DNODE_EXIT(db); #endif /* DEBUG */ mutex_enter(&db->db_mtx); if (db->db_state != DB_EVICTING && zfs_refcount_is_zero(&db->db_holds)) { db_marker->db_level = db->db_level; db_marker->db_blkid = db->db_blkid; db_marker->db_state = DB_SEARCH; avl_insert_here(&dn->dn_dbufs, db_marker, db, AVL_BEFORE); /* * We need to use the "marker" dbuf rather than * simply getting the next dbuf, because * dbuf_destroy() may actually remove multiple dbufs. * It can call itself recursively on the parent dbuf, * which may also be removed from dn_dbufs. The code * flow would look like: * * dbuf_destroy(): * dnode_rele_and_unlock(parent_dbuf, evicting=TRUE): * if (!cacheable || pending_evict) * dbuf_destroy() */ dbuf_destroy(db); db_next = AVL_NEXT(&dn->dn_dbufs, db_marker); avl_remove(&dn->dn_dbufs, db_marker); } else { db->db_pending_evict = TRUE; mutex_exit(&db->db_mtx); db_next = AVL_NEXT(&dn->dn_dbufs, db); } } mutex_exit(&dn->dn_dbufs_mtx); kmem_free(db_marker, sizeof (dmu_buf_impl_t)); dnode_evict_bonus(dn); } void dnode_evict_bonus(dnode_t *dn) { rw_enter(&dn->dn_struct_rwlock, RW_WRITER); if (dn->dn_bonus != NULL) { if (zfs_refcount_is_zero(&dn->dn_bonus->db_holds)) { mutex_enter(&dn->dn_bonus->db_mtx); dbuf_destroy(dn->dn_bonus); dn->dn_bonus = NULL; } else { dn->dn_bonus->db_pending_evict = TRUE; } } rw_exit(&dn->dn_struct_rwlock); } static void dnode_undirty_dbufs(list_t *list) { dbuf_dirty_record_t *dr; while ((dr = list_head(list))) { dmu_buf_impl_t *db = dr->dr_dbuf; uint64_t txg = dr->dr_txg; if (db->db_level != 0) dnode_undirty_dbufs(&dr->dt.di.dr_children); mutex_enter(&db->db_mtx); /* XXX - use dbuf_undirty()? */ list_remove(list, dr); ASSERT(list_head(&db->db_dirty_records) == dr); list_remove_head(&db->db_dirty_records); ASSERT(list_is_empty(&db->db_dirty_records)); db->db_dirtycnt -= 1; if (db->db_level == 0) { ASSERT(db->db_blkid == DMU_BONUS_BLKID || dr->dt.dl.dr_data == db->db_buf); dbuf_unoverride(dr); } else { mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } kmem_free(dr, sizeof (dbuf_dirty_record_t)); dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); } } static void dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) { int txgoff = tx->tx_txg & TXG_MASK; ASSERT(dmu_tx_is_syncing(tx)); /* * Our contents should have been freed in dnode_sync() by the * free range record inserted by the caller of dnode_free(). */ ASSERT0(DN_USED_BYTES(dn->dn_phys)); ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr)); dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); /* * XXX - It would be nice to assert this, but we may still * have residual holds from async evictions from the arc... * * zfs_obj_to_path() also depends on this being * commented out. * * ASSERT3U(zfs_refcount_count(&dn->dn_holds), ==, 1); */ /* Undirty next bits */ dn->dn_next_nlevels[txgoff] = 0; dn->dn_next_indblkshift[txgoff] = 0; dn->dn_next_blksz[txgoff] = 0; dn->dn_next_maxblkid[txgoff] = 0; /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); ASSERT(dn->dn_type != DMU_OT_NONE); ASSERT(dn->dn_free_txg > 0); if (dn->dn_allocated_txg != dn->dn_free_txg) dmu_buf_will_dirty(&dn->dn_dbuf->db, tx); bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots); dnode_free_interior_slots(dn); mutex_enter(&dn->dn_mtx); dn->dn_type = DMU_OT_NONE; dn->dn_maxblkid = 0; dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_have_spill = B_FALSE; dn->dn_num_slots = 1; mutex_exit(&dn->dn_mtx); ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); /* * Now that we've released our hold, the dnode may * be evicted, so we mustn't access it. */ } /* * Write out the dnode's dirty buffers. */ void dnode_sync(dnode_t *dn, dmu_tx_t *tx) { objset_t *os = dn->dn_objset; dnode_phys_t *dnp = dn->dn_phys; int txgoff = tx->tx_txg & TXG_MASK; list_t *list = &dn->dn_dirty_records[txgoff]; static const dnode_phys_t zerodn __maybe_unused = { 0 }; boolean_t kill_spill = B_FALSE; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); ASSERT(dnp->dn_type != DMU_OT_NONE || bcmp(dnp, &zerodn, DNODE_MIN_SIZE) == 0); DNODE_VERIFY(dn); ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); /* * Do user accounting if it is enabled and this is not * an encrypted receive. */ if (dmu_objset_userused_enabled(os) && !DMU_OBJECT_IS_SPECIAL(dn->dn_object) && (!os->os_encrypted || !dmu_objset_is_receiving(os))) { mutex_enter(&dn->dn_mtx); dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); dn->dn_oldflags = dn->dn_phys->dn_flags; dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED; if (dmu_objset_userobjused_enabled(dn->dn_objset)) dn->dn_phys->dn_flags |= DNODE_FLAG_USEROBJUSED_ACCOUNTED; mutex_exit(&dn->dn_mtx); dmu_objset_userquota_get_ids(dn, B_FALSE, tx); - } else { - /* Once we account for it, we should always account for it */ + } else if (!(os->os_encrypted && dmu_objset_is_receiving(os))) { + /* + * Once we account for it, we should always account for it, + * except for the case of a raw receive. We will not be able + * to account for it until the receiving dataset has been + * mounted. + */ ASSERT(!(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED)); ASSERT(!(dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED)); } mutex_enter(&dn->dn_mtx); if (dn->dn_allocated_txg == tx->tx_txg) { /* The dnode is newly allocated or reallocated */ if (dnp->dn_type == DMU_OT_NONE) { /* this is a first alloc, not a realloc */ dnp->dn_nlevels = 1; dnp->dn_nblkptr = dn->dn_nblkptr; } dnp->dn_type = dn->dn_type; dnp->dn_bonustype = dn->dn_bonustype; dnp->dn_bonuslen = dn->dn_bonuslen; } dnp->dn_extra_slots = dn->dn_num_slots - 1; ASSERT(dnp->dn_nlevels > 1 || BP_IS_HOLE(&dnp->dn_blkptr[0]) || BP_IS_EMBEDDED(&dnp->dn_blkptr[0]) || BP_GET_LSIZE(&dnp->dn_blkptr[0]) == dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); ASSERT(dnp->dn_nlevels < 2 || BP_IS_HOLE(&dnp->dn_blkptr[0]) || BP_GET_LSIZE(&dnp->dn_blkptr[0]) == 1 << dnp->dn_indblkshift); if (dn->dn_next_type[txgoff] != 0) { dnp->dn_type = dn->dn_type; dn->dn_next_type[txgoff] = 0; } if (dn->dn_next_blksz[txgoff] != 0) { ASSERT(P2PHASE(dn->dn_next_blksz[txgoff], SPA_MINBLOCKSIZE) == 0); ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) || dn->dn_maxblkid == 0 || list_head(list) != NULL || dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == dnp->dn_datablkszsec || !range_tree_is_empty(dn->dn_free_ranges[txgoff])); dnp->dn_datablkszsec = dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; dn->dn_next_blksz[txgoff] = 0; } if (dn->dn_next_bonuslen[txgoff] != 0) { if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN) dnp->dn_bonuslen = 0; else dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff]; ASSERT(dnp->dn_bonuslen <= DN_SLOTS_TO_BONUSLEN(dnp->dn_extra_slots + 1)); dn->dn_next_bonuslen[txgoff] = 0; } if (dn->dn_next_bonustype[txgoff] != 0) { ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff])); dnp->dn_bonustype = dn->dn_next_bonustype[txgoff]; dn->dn_next_bonustype[txgoff] = 0; } boolean_t freeing_dnode = dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg; /* * Remove the spill block if we have been explicitly asked to * remove it, or if the object is being removed. */ if (dn->dn_rm_spillblk[txgoff] || freeing_dnode) { if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) kill_spill = B_TRUE; dn->dn_rm_spillblk[txgoff] = 0; } if (dn->dn_next_indblkshift[txgoff] != 0) { ASSERT(dnp->dn_nlevels == 1); dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; dn->dn_next_indblkshift[txgoff] = 0; } /* * Just take the live (open-context) values for checksum and compress. * Strictly speaking it's a future leak, but nothing bad happens if we * start using the new checksum or compress algorithm a little early. */ dnp->dn_checksum = dn->dn_checksum; dnp->dn_compress = dn->dn_compress; mutex_exit(&dn->dn_mtx); if (kill_spill) { free_blocks(dn, DN_SPILL_BLKPTR(dn->dn_phys), 1, tx); mutex_enter(&dn->dn_mtx); dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR; mutex_exit(&dn->dn_mtx); } /* process all the "freed" ranges in the file */ if (dn->dn_free_ranges[txgoff] != NULL) { dnode_sync_free_range_arg_t dsfra; dsfra.dsfra_dnode = dn; dsfra.dsfra_tx = tx; dsfra.dsfra_free_indirects = freeing_dnode; mutex_enter(&dn->dn_mtx); if (freeing_dnode) { ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff], 0, dn->dn_maxblkid + 1)); } /* * Because dnode_sync_free_range() must drop dn_mtx during its * processing, using it as a callback to range_tree_vacate() is * not safe. No other operations (besides destroy) are allowed * once range_tree_vacate() has begun, and dropping dn_mtx * would leave a window open for another thread to observe that * invalid (and unsafe) state. */ range_tree_walk(dn->dn_free_ranges[txgoff], dnode_sync_free_range, &dsfra); range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL); range_tree_destroy(dn->dn_free_ranges[txgoff]); dn->dn_free_ranges[txgoff] = NULL; mutex_exit(&dn->dn_mtx); } if (freeing_dnode) { dn->dn_objset->os_freed_dnodes++; dnode_sync_free(dn, tx); return; } if (dn->dn_num_slots > DNODE_MIN_SLOTS) { dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; mutex_enter(&ds->ds_lock); ds->ds_feature_activation[SPA_FEATURE_LARGE_DNODE] = (void *)B_TRUE; mutex_exit(&ds->ds_lock); } if (dn->dn_next_nlevels[txgoff]) { dnode_increase_indirection(dn, tx); dn->dn_next_nlevels[txgoff] = 0; } /* * This must be done after dnode_sync_free_range() * and dnode_increase_indirection(). See dnode_new_blkid() * for an explanation of the high bit being set. */ if (dn->dn_next_maxblkid[txgoff]) { mutex_enter(&dn->dn_mtx); dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff] & ~DMU_NEXT_MAXBLKID_SET; dn->dn_next_maxblkid[txgoff] = 0; mutex_exit(&dn->dn_mtx); } if (dn->dn_next_nblkptr[txgoff]) { /* this should only happen on a realloc */ ASSERT(dn->dn_allocated_txg == tx->tx_txg); if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) { /* zero the new blkptrs we are gaining */ bzero(dnp->dn_blkptr + dnp->dn_nblkptr, sizeof (blkptr_t) * (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr)); #ifdef ZFS_DEBUG } else { int i; ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr); /* the blkptrs we are losing better be unallocated */ for (i = 0; i < dnp->dn_nblkptr; i++) { if (i >= dn->dn_next_nblkptr[txgoff]) ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i])); } #endif } mutex_enter(&dn->dn_mtx); dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff]; dn->dn_next_nblkptr[txgoff] = 0; mutex_exit(&dn->dn_mtx); } dbuf_sync_list(list, dn->dn_phys->dn_nlevels - 1, tx); if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { ASSERT3P(list_head(list), ==, NULL); dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); } /* * Although we have dropped our reference to the dnode, it * can't be evicted until its written, and we haven't yet * initiated the IO for the dnode's dbuf. Additionally, the caller * has already added a reference to the dnode because it's on the * os_synced_dnodes list. */ } diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 26d4c2fe7e33..1ea184de338c 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1,2868 +1,2861 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2017, Datto, Inc. All rights reserved. * Copyright (c) 2018 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include /* * This file's primary purpose is for managing master encryption keys in * memory and on disk. For more info on how these keys are used, see the * block comment in zio_crypt.c. * * All master keys are stored encrypted on disk in the form of the DSL * Crypto Key ZAP object. The binary key data in this object is always * randomly generated and is encrypted with the user's wrapping key. This * layer of indirection allows the user to change their key without * needing to re-encrypt the entire dataset. The ZAP also holds on to the * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to * safely decrypt the master key. For more info on the user's key see the * block comment in libzfs_crypto.c * * In-memory encryption keys are managed through the spa_keystore. The * keystore consists of 3 AVL trees, which are as follows: * * The Wrapping Key Tree: * The wrapping key (wkey) tree stores the user's keys that are fed into the * kernel through 'zfs load-key' and related commands. Datasets inherit their * parent's wkey by default, so these structures are refcounted. The wrapping * keys remain in memory until they are explicitly unloaded (with * "zfs unload-key"). Unloading is only possible when no datasets are using * them (refcount=0). * * The DSL Crypto Key Tree: * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted * master keys. They are used by the functions in zio_crypt.c to perform * encryption, decryption, and authentication. Snapshots and clones of a given * dataset will share a DSL Crypto Key, so they are also refcounted. Once the * refcount on a key hits zero, it is immediately zeroed out and freed. * * The Crypto Key Mapping Tree: * The zio layer needs to lookup master keys by their dataset object id. Since * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of * dsl_key_mapping_t's which essentially just map the dataset object id to its * appropriate DSL Crypto Key. The management for creating and destroying these * mappings hooks into the code for owning and disowning datasets. Usually, * there will only be one active dataset owner, but there are times * (particularly during dataset creation and destruction) when this may not be * true or the dataset may not be initialized enough to own. As a result, this * object is also refcounted. */ /* * This tunable allows datasets to be raw received even if the stream does * not include IVset guids or if the guids don't match. This is used as part * of the resolution for ZPOOL_ERRATA_ZOL_8308_ENCRYPTION. */ int zfs_disable_ivset_guid_check = 0; static void dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag) { (void) zfs_refcount_add(&wkey->wk_refcnt, tag); } static void dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag) { (void) zfs_refcount_remove(&wkey->wk_refcnt, tag); } static void dsl_wrapping_key_free(dsl_wrapping_key_t *wkey) { ASSERT0(zfs_refcount_count(&wkey->wk_refcnt)); if (wkey->wk_key.ck_data) { bzero(wkey->wk_key.ck_data, CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); kmem_free(wkey->wk_key.ck_data, CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); } zfs_refcount_destroy(&wkey->wk_refcnt); kmem_free(wkey, sizeof (dsl_wrapping_key_t)); } static void dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat, uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out) { dsl_wrapping_key_t *wkey; /* allocate the wrapping key */ wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP); /* allocate and initialize the underlying crypto key */ wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP); wkey->wk_key.ck_format = CRYPTO_KEY_RAW; wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN); bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN); /* initialize the rest of the struct */ zfs_refcount_create(&wkey->wk_refcnt); wkey->wk_keyformat = keyformat; wkey->wk_salt = salt; wkey->wk_iters = iters; *wkey_out = wkey; } int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out) { int ret; uint64_t crypt = ZIO_CRYPT_INHERIT; uint64_t keyformat = ZFS_KEYFORMAT_NONE; uint64_t salt = 0, iters = 0; dsl_crypto_params_t *dcp = NULL; dsl_wrapping_key_t *wkey = NULL; uint8_t *wkeydata = NULL; uint_t wkeydata_len = 0; char *keylocation = NULL; dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP); dcp->cp_cmd = cmd; /* get relevant arguments from the nvlists */ if (props != NULL) { (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); (void) nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt); (void) nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); dcp->cp_crypt = crypt; } if (crypto_args != NULL) { (void) nvlist_lookup_uint8_array(crypto_args, "wkeydata", &wkeydata, &wkeydata_len); } /* check for valid command */ if (dcp->cp_cmd >= DCP_CMD_MAX) { ret = SET_ERROR(EINVAL); goto error; } else { dcp->cp_cmd = cmd; } /* check for valid crypt */ if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) { ret = SET_ERROR(EINVAL); goto error; } else { dcp->cp_crypt = crypt; } /* check for valid keyformat */ if (keyformat >= ZFS_KEYFORMAT_FORMATS) { ret = SET_ERROR(EINVAL); goto error; } /* check for a valid keylocation (of any kind) and copy it in */ if (keylocation != NULL) { if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) { ret = SET_ERROR(EINVAL); goto error; } dcp->cp_keylocation = spa_strdup(keylocation); } /* check wrapping key length, if given */ if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) { ret = SET_ERROR(EINVAL); goto error; } /* if the user asked for the default crypt, determine that now */ if (dcp->cp_crypt == ZIO_CRYPT_ON) dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; /* create the wrapping key from the raw data */ if (wkeydata != NULL) { /* create the wrapping key with the verified parameters */ dsl_wrapping_key_create(wkeydata, keyformat, salt, iters, &wkey); dcp->cp_wkey = wkey; } /* * Remove the encryption properties from the nvlist since they are not * maintained through the DSL. */ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); *dcp_out = dcp; return (0); error: kmem_free(dcp, sizeof (dsl_crypto_params_t)); *dcp_out = NULL; return (ret); } void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload) { if (dcp == NULL) return; if (dcp->cp_keylocation != NULL) spa_strfree(dcp->cp_keylocation); if (unload && dcp->cp_wkey != NULL) dsl_wrapping_key_free(dcp->cp_wkey); kmem_free(dcp, sizeof (dsl_crypto_params_t)); } static int spa_crypto_key_compare(const void *a, const void *b) { const dsl_crypto_key_t *dcka = a; const dsl_crypto_key_t *dckb = b; if (dcka->dck_obj < dckb->dck_obj) return (-1); if (dcka->dck_obj > dckb->dck_obj) return (1); return (0); } static int spa_key_mapping_compare(const void *a, const void *b) { const dsl_key_mapping_t *kma = a; const dsl_key_mapping_t *kmb = b; if (kma->km_dsobj < kmb->km_dsobj) return (-1); if (kma->km_dsobj > kmb->km_dsobj) return (1); return (0); } static int spa_wkey_compare(const void *a, const void *b) { const dsl_wrapping_key_t *wka = a; const dsl_wrapping_key_t *wkb = b; if (wka->wk_ddobj < wkb->wk_ddobj) return (-1); if (wka->wk_ddobj > wkb->wk_ddobj) return (1); return (0); } void spa_keystore_init(spa_keystore_t *sk) { rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL); rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL); rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL); avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare, sizeof (dsl_crypto_key_t), offsetof(dsl_crypto_key_t, dck_avl_link)); avl_create(&sk->sk_key_mappings, spa_key_mapping_compare, sizeof (dsl_key_mapping_t), offsetof(dsl_key_mapping_t, km_avl_link)); avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t), offsetof(dsl_wrapping_key_t, wk_avl_link)); } void spa_keystore_fini(spa_keystore_t *sk) { dsl_wrapping_key_t *wkey; void *cookie = NULL; ASSERT(avl_is_empty(&sk->sk_dsl_keys)); ASSERT(avl_is_empty(&sk->sk_key_mappings)); while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL) dsl_wrapping_key_free(wkey); avl_destroy(&sk->sk_wkeys); avl_destroy(&sk->sk_key_mappings); avl_destroy(&sk->sk_dsl_keys); rw_destroy(&sk->sk_wkeys_lock); rw_destroy(&sk->sk_km_lock); rw_destroy(&sk->sk_dk_lock); } static int dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) { if (dd->dd_crypto_obj == 0) return (SET_ERROR(ENOENT)); return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); } static int dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) { *version = 0; if (dd->dd_crypto_obj == 0) return (SET_ERROR(ENOENT)); /* version 0 is implied by ENOENT */ (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, 8, 1, version); return (0); } boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) { int ret; uint64_t version = 0; ret = dsl_dir_get_encryption_version(dd, &version); if (ret != 0) return (B_FALSE); return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); } static int spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, void *tag, dsl_wrapping_key_t **wkey_out) { int ret; dsl_wrapping_key_t search_wkey; dsl_wrapping_key_t *found_wkey; ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock)); /* init the search wrapping key */ search_wkey.wk_ddobj = ddobj; /* lookup the wrapping key */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); if (!found_wkey) { ret = SET_ERROR(ENOENT); goto error; } /* increment the refcount */ dsl_wrapping_key_hold(found_wkey, tag); *wkey_out = found_wkey; return (0); error: *wkey_out = NULL; return (ret); } static int spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, dsl_wrapping_key_t **wkey_out) { int ret; dsl_wrapping_key_t *wkey; uint64_t rddobj; boolean_t locked = B_FALSE; if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) { rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER); locked = B_TRUE; } /* get the ddobj that the keylocation property was inherited from */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto error; /* lookup the wkey in the avl tree */ ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey); if (ret != 0) goto error; /* unlock the wkey tree if we locked it */ if (locked) rw_exit(&spa->spa_keystore.sk_wkeys_lock); *wkey_out = wkey; return (0); error: if (locked) rw_exit(&spa->spa_keystore.sk_wkeys_lock); *wkey_out = NULL; return (ret); } int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation) { int ret = 0; dsl_dir_t *dd = NULL; dsl_pool_t *dp = NULL; uint64_t rddobj; /* hold the dsl dir */ ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto out; ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto out; } /* if dd is not encrypted, the value may only be "none" */ if (dd->dd_crypto_obj == 0) { if (strcmp(keylocation, "none") != 0) { ret = SET_ERROR(EACCES); goto out; } ret = 0; goto out; } /* check for a valid keylocation for encrypted datasets */ if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) { ret = SET_ERROR(EINVAL); goto out; } /* check that this is an encryption root */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto out; if (rddobj != dd->dd_object) { ret = SET_ERROR(EACCES); goto out; } dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); return (0); out: if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } static void dsl_crypto_key_free(dsl_crypto_key_t *dck) { ASSERT(zfs_refcount_count(&dck->dck_holds) == 0); /* destroy the zio_crypt_key_t */ zio_crypt_key_destroy(&dck->dck_key); /* free the refcount, wrapping key, and lock */ zfs_refcount_destroy(&dck->dck_holds); if (dck->dck_wkey) dsl_wrapping_key_rele(dck->dck_wkey, dck); /* free the key */ kmem_free(dck, sizeof (dsl_crypto_key_t)); } static void dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag) { if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) dsl_crypto_key_free(dck); } static int dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; uint64_t crypt = 0, guid = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; dsl_crypto_key_t *dck; /* allocate and initialize the key */ dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP); /* fetch all of the values we need from the ZAP */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, raw_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, raw_hmac_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac); if (ret != 0) goto error; /* the initial on-disk format for encryption did not have a version */ (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); /* * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; } /* finish initializing the dsl_crypto_key_t */ zfs_refcount_create(&dck->dck_holds); dsl_wrapping_key_hold(wkey, dck); dck->dck_wkey = wkey; dck->dck_obj = dckobj; zfs_refcount_add(&dck->dck_holds, tag); *dck_out = dck; return (0); error: if (dck != NULL) { bzero(dck, sizeof (dsl_crypto_key_t)); kmem_free(dck, sizeof (dsl_crypto_key_t)); } *dck_out = NULL; return (ret); } static int spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; dsl_crypto_key_t search_dck; dsl_crypto_key_t *found_dck; ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock)); /* init the search key */ search_dck.dck_obj = dckobj; /* find the matching key in the keystore */ found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL); if (!found_dck) { ret = SET_ERROR(ENOENT); goto error; } /* increment the refcount */ zfs_refcount_add(&found_dck->dck_holds, tag); *dck_out = found_dck; return (0); error: *dck_out = NULL; return (ret); } static int spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, dsl_crypto_key_t **dck_out) { int ret; avl_index_t where; dsl_crypto_key_t *dck_io = NULL, *dck_ks = NULL; dsl_wrapping_key_t *wkey = NULL; uint64_t dckobj = dd->dd_crypto_obj; /* Lookup the key in the tree of currently loaded keys */ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_READER); ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); rw_exit(&spa->spa_keystore.sk_dk_lock); if (ret == 0) { *dck_out = dck_ks; return (0); } /* Lookup the wrapping key from the keystore */ ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey); if (ret != 0) { *dck_out = NULL; return (SET_ERROR(EACCES)); } /* Read the key from disk */ ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj, tag, &dck_io); if (ret != 0) { dsl_wrapping_key_rele(wkey, FTAG); *dck_out = NULL; return (ret); } /* * Add the key to the keystore. It may already exist if it was * added while performing the read from disk. In this case discard * it and return the key from the keystore. */ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); if (ret != 0) { avl_find(&spa->spa_keystore.sk_dsl_keys, dck_io, &where); avl_insert(&spa->spa_keystore.sk_dsl_keys, dck_io, where); *dck_out = dck_io; } else { dsl_crypto_key_free(dck_io); *dck_out = dck_ks; } /* Release the wrapping key (the dsl key now has a reference to it) */ dsl_wrapping_key_rele(wkey, FTAG); rw_exit(&spa->spa_keystore.sk_dk_lock); return (0); } void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag) { rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) { avl_remove(&spa->spa_keystore.sk_dsl_keys, dck); dsl_crypto_key_free(dck); } rw_exit(&spa->spa_keystore.sk_dk_lock); } int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey) { int ret; avl_index_t where; dsl_wrapping_key_t *found_wkey; rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* insert the wrapping key into the keystore */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); if (found_wkey != NULL) { ret = SET_ERROR(EEXIST); goto error_unlock; } avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (ret); } int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, boolean_t noop) { int ret; dsl_dir_t *dd = NULL; dsl_crypto_key_t *dck = NULL; dsl_wrapping_key_t *wkey = dcp->cp_wkey; dsl_pool_t *dp = NULL; uint64_t rddobj, keyformat, salt, iters; /* * We don't validate the wrapping key's keyformat, salt, or iters * since they will never be needed after the DCK has been wrapped. */ if (dcp->cp_wkey == NULL || dcp->cp_cmd != DCP_CMD_NONE || dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL) return (SET_ERROR(EINVAL)); ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto error; if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = SET_ERROR(ENOTSUP); goto error; } /* hold the dsl dir */ ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* confirm that dd is the encryption root */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0 || rddobj != dd->dd_object) { ret = SET_ERROR(EINVAL); goto error; } /* initialize the wkey's ddobj */ wkey->wk_ddobj = dd->dd_object; /* verify that the wkey is correct by opening its dsl key */ ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey, dd->dd_crypto_obj, FTAG, &dck); if (ret != 0) goto error; /* initialize the wkey encryption parameters from the DSL Crypto Key */ ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat); if (ret != 0) goto error; ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); if (ret != 0) goto error; ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); if (ret != 0) goto error; ASSERT3U(keyformat, <, ZFS_KEYFORMAT_FORMATS); ASSERT3U(keyformat, !=, ZFS_KEYFORMAT_NONE); IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, iters != 0); IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, salt != 0); IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, iters == 0); IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, salt == 0); wkey->wk_keyformat = keyformat; wkey->wk_salt = salt; wkey->wk_iters = iters; /* * At this point we have verified the wkey and confirmed that it can * be used to decrypt a DSL Crypto Key. We can simply cleanup and * return if this is all the user wanted to do. */ if (noop) goto error; /* insert the wrapping key into the keystore */ ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey); if (ret != 0) goto error; dsl_crypto_key_rele(dck, FTAG); dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); /* create any zvols under this ds */ zvol_create_minors_recursive(dsname); return (0); error: if (dck != NULL) dsl_crypto_key_rele(dck, FTAG); if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj) { int ret; dsl_wrapping_key_t search_wkey; dsl_wrapping_key_t *found_wkey; /* init the search wrapping key */ search_wkey.wk_ddobj = ddobj; rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* remove the wrapping key from the keystore */ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); if (!found_wkey) { ret = SET_ERROR(EACCES); goto error_unlock; } else if (zfs_refcount_count(&found_wkey->wk_refcnt) != 0) { ret = SET_ERROR(EBUSY); goto error_unlock; } avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); rw_exit(&spa->spa_keystore.sk_wkeys_lock); /* free the wrapping key */ dsl_wrapping_key_free(found_wkey); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_wkeys_lock); return (ret); } int spa_keystore_unload_wkey(const char *dsname) { int ret = 0; dsl_dir_t *dd = NULL; dsl_pool_t *dp = NULL; spa_t *spa = NULL; ret = spa_open(dsname, &spa, FTAG); if (ret != 0) return (ret); /* * Wait for any outstanding txg IO to complete, releasing any * remaining references on the wkey. */ if (spa_mode(spa) != SPA_MODE_READ) txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); /* hold the dsl dir */ ret = dsl_pool_hold(dsname, FTAG, &dp); if (ret != 0) goto error; if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = (SET_ERROR(ENOTSUP)); goto error; } ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* unload the wkey */ ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); if (ret != 0) goto error; dsl_dir_rele(dd, FTAG); dsl_pool_rele(dp, FTAG); /* remove any zvols under this ds */ zvol_remove_minors(dp->dp_spa, dsname, B_TRUE); return (0); error: if (dd != NULL) dsl_dir_rele(dd, FTAG); if (dp != NULL) dsl_pool_rele(dp, FTAG); return (ret); } void key_mapping_add_ref(dsl_key_mapping_t *km, void *tag) { ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); zfs_refcount_add(&km->km_refcnt, tag); } /* * The locking here is a little tricky to ensure we don't cause unnecessary * performance problems. We want to release a key mapping whenever someone * decrements the refcount to 0, but freeing the mapping requires removing * it from the spa_keystore, which requires holding sk_km_lock as a writer. * Most of the time we don't want to hold this lock as a writer, since the * same lock is held as a reader for each IO that needs to encrypt / decrypt * data for any dataset and in practice we will only actually free the * mapping after unmounting a dataset. */ void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag) { ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); if (zfs_refcount_remove(&km->km_refcnt, tag) != 0) return; /* * We think we are going to need to free the mapping. Add a * reference to prevent most other releasers from thinking * this might be their responsibility. This is inherently * racy, so we will confirm that we are legitimately the * last holder once we have the sk_km_lock as a writer. */ zfs_refcount_add(&km->km_refcnt, FTAG); rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) { rw_exit(&spa->spa_keystore.sk_km_lock); return; } avl_remove(&spa->spa_keystore.sk_key_mappings, km); rw_exit(&spa->spa_keystore.sk_km_lock); spa_keystore_dsl_key_rele(spa, km->km_key, km); zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); } int spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag, dsl_key_mapping_t **km_out) { int ret; avl_index_t where; dsl_key_mapping_t *km, *found_km; boolean_t should_free = B_FALSE; /* Allocate and initialize the mapping */ km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP); zfs_refcount_create(&km->km_refcnt); ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key); if (ret != 0) { zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); if (km_out != NULL) *km_out = NULL; return (ret); } km->km_dsobj = ds->ds_object; rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); /* * If a mapping already exists, simply increment its refcount and * cleanup the one we made. We want to allocate / free outside of * the lock because this lock is also used by the zio layer to lookup * key mappings. Otherwise, use the one we created. Normally, there will * only be one active reference at a time (the objset owner), but there * are times when there could be multiple async users. */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where); if (found_km != NULL) { should_free = B_TRUE; zfs_refcount_add(&found_km->km_refcnt, tag); if (km_out != NULL) *km_out = found_km; } else { zfs_refcount_add(&km->km_refcnt, tag); avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); if (km_out != NULL) *km_out = km; } rw_exit(&spa->spa_keystore.sk_km_lock); if (should_free) { spa_keystore_dsl_key_rele(spa, km->km_key, km); zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); } return (0); } int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) { int ret; dsl_key_mapping_t search_km; dsl_key_mapping_t *found_km; /* init the search key mapping */ search_km.km_dsobj = dsobj; rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); /* find the matching mapping */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, NULL); if (found_km == NULL) { ret = SET_ERROR(ENOENT); goto error_unlock; } rw_exit(&spa->spa_keystore.sk_km_lock); key_mapping_rele(spa, found_km, tag); return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_km_lock); return (ret); } /* * This function is primarily used by the zio and arc layer to lookup * DSL Crypto Keys for encryption. Callers must release the key with * spa_keystore_dsl_key_rele(). The function may also be called with * dck_out == NULL and tag == NULL to simply check that a key exists * without getting a reference to it. */ int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; dsl_key_mapping_t search_km; dsl_key_mapping_t *found_km; ASSERT((tag != NULL && dck_out != NULL) || (tag == NULL && dck_out == NULL)); /* init the search key mapping */ search_km.km_dsobj = dsobj; rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); /* remove the mapping from the tree */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, NULL); if (found_km == NULL) { ret = SET_ERROR(ENOENT); goto error_unlock; } if (found_km && tag) zfs_refcount_add(&found_km->km_key->dck_holds, tag); rw_exit(&spa->spa_keystore.sk_km_lock); if (dck_out != NULL) *dck_out = found_km->km_key; return (0); error_unlock: rw_exit(&spa->spa_keystore.sk_km_lock); if (dck_out != NULL) *dck_out = NULL; return (ret); } static int dmu_objset_check_wkey_loaded(dsl_dir_t *dd) { int ret; dsl_wrapping_key_t *wkey = NULL; ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG, &wkey); if (ret != 0) return (SET_ERROR(EACCES)); dsl_wrapping_key_rele(wkey, FTAG); return (0); } static zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd) { /* check if this dd has a has a dsl key */ if (dd->dd_crypto_obj == 0) return (ZFS_KEYSTATUS_NONE); return (dmu_objset_check_wkey_loaded(dd) == 0 ? ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE); } static int dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt) { if (dd->dd_crypto_obj == 0) { *crypt = ZIO_CRYPT_OFF; return (0); } return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt)); } static void dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt, uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac, uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat, uint64_t salt, uint64_t iters, dmu_tx_t *tx) { VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &root_ddobj, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, keydata, tx)); VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, hmac_keydata, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt, tx)); VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters, tx)); } static void dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) { zio_crypt_key_t *key = &dck->dck_key; dsl_wrapping_key_t *wkey = dck->dck_wkey; uint8_t keydata[MASTER_KEY_MAX_LEN]; uint8_t hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); /* encrypt and store the keys along with the IV and MAC */ VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, keydata, hmac_keydata)); /* update the ZAP with the obtained values */ dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, tx); } typedef struct spa_keystore_change_key_args { const char *skcka_dsname; dsl_crypto_params_t *skcka_cp; } spa_keystore_change_key_args_t; static int spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) { int ret; dsl_dir_t *dd = NULL; dsl_pool_t *dp = dmu_tx_pool(tx); spa_keystore_change_key_args_t *skcka = arg; dsl_crypto_params_t *dcp = skcka->skcka_cp; uint64_t rddobj; /* check for the encryption feature */ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { ret = SET_ERROR(ENOTSUP); goto error; } /* check for valid key change command */ if (dcp->cp_cmd != DCP_CMD_NEW_KEY && dcp->cp_cmd != DCP_CMD_INHERIT && dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY && dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) { ret = SET_ERROR(EINVAL); goto error; } /* hold the dd */ ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL); if (ret != 0) { dd = NULL; goto error; } /* verify that the dataset is encrypted */ if (dd->dd_crypto_obj == 0) { ret = SET_ERROR(EINVAL); goto error; } /* clones must always use their origin's key */ if (dsl_dir_is_clone(dd)) { ret = SET_ERROR(EINVAL); goto error; } /* lookup the ddobj we are inheriting the keylocation from */ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); if (ret != 0) goto error; /* Handle inheritance */ if (dcp->cp_cmd == DCP_CMD_INHERIT || dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) { /* no other encryption params should be given */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL || dcp->cp_wkey != NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that this is an encryption root */ if (dd->dd_object != rddobj) { ret = SET_ERROR(EINVAL); goto error; } /* check that the parent is encrypted */ if (dd->dd_parent->dd_crypto_obj == 0) { ret = SET_ERROR(EINVAL); goto error; } /* if we are rewrapping check that both keys are loaded */ if (dcp->cp_cmd == DCP_CMD_INHERIT) { ret = dmu_objset_check_wkey_loaded(dd); if (ret != 0) goto error; ret = dmu_objset_check_wkey_loaded(dd->dd_parent); if (ret != 0) goto error; } dsl_dir_rele(dd, FTAG); return (0); } /* handle forcing an encryption root without rewrapping */ if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { /* no other encryption params should be given */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL || dcp->cp_wkey != NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that this is not an encryption root */ if (dd->dd_object == rddobj) { ret = SET_ERROR(EINVAL); goto error; } dsl_dir_rele(dd, FTAG); return (0); } /* crypt cannot be changed after creation */ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) { ret = SET_ERROR(EINVAL); goto error; } /* we are not inheritting our parent's wkey so we need one ourselves */ if (dcp->cp_wkey == NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check for a valid keyformat for the new wrapping key */ if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS || dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) { ret = SET_ERROR(EINVAL); goto error; } /* * If this dataset is not currently an encryption root we need a new * keylocation for this dataset's new wrapping key. Otherwise we can * just keep the one we already had. */ if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) { ret = SET_ERROR(EINVAL); goto error; } /* check that the keylocation is valid if it is not NULL */ if (dcp->cp_keylocation != NULL && !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) { ret = SET_ERROR(EINVAL); goto error; } /* passphrases require pbkdf2 salt and iters */ if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) { if (dcp->cp_wkey->wk_salt == 0 || dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) { ret = SET_ERROR(EINVAL); goto error; } } else { if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) { ret = SET_ERROR(EINVAL); goto error; } } /* make sure the dd's wkey is loaded */ ret = dmu_objset_check_wkey_loaded(dd); if (ret != 0) goto error; dsl_dir_rele(dd, FTAG); return (0); error: if (dd != NULL) dsl_dir_rele(dd, FTAG); return (ret); } /* * This function deals with the intricacies of updating wrapping * key references and encryption roots recursively in the event * of a call to 'zfs change-key' or 'zfs promote'. The 'skip' * parameter should always be set to B_FALSE when called * externally. */ static void spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, uint64_t new_rddobj, dsl_wrapping_key_t *wkey, boolean_t skip, dmu_tx_t *tx) { int ret; zap_cursor_t *zc; zap_attribute_t *za; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dir_t *dd = NULL; dsl_crypto_key_t *dck = NULL; uint64_t curr_rddobj; ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock)); /* hold the dd */ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); /* ignore special dsl dirs */ if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { dsl_dir_rele(dd, FTAG); return; } ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); VERIFY(ret == 0 || ret == ENOENT); /* * Stop recursing if this dsl dir didn't inherit from the root * or if this dd is a clone. */ if (ret == ENOENT || (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd)))) { dsl_dir_rele(dd, FTAG); return; } /* * If we don't have a wrapping key just update the dck to reflect the * new encryption root. Otherwise rewrap the entire dck and re-sync it * to disk. If skip is set, we don't do any of this work. */ if (!skip) { if (wkey == NULL) { VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); } else { VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, FTAG, &dck)); dsl_wrapping_key_hold(wkey, dck); dsl_wrapping_key_rele(dck->dck_wkey, dck); dck->dck_wkey = wkey; dsl_crypto_key_sync(dck, tx); spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); } } zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* Recurse into all child dsl dirs. */ for (zap_cursor_init(zc, dp->dp_meta_objset, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { spa_keystore_change_key_sync_impl(rddobj, za->za_first_integer, new_rddobj, wkey, B_FALSE, tx); } zap_cursor_fini(zc); /* * Recurse into all dsl dirs of clones. We utilize the skip parameter * here so that we don't attempt to process the clones directly. This * is because the clone and its origin share the same dck, which has * already been updated. */ for (zap_cursor_init(zc, dp->dp_meta_objset, dsl_dir_phys(dd)->dd_clones); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { dsl_dataset_t *clone; VERIFY0(dsl_dataset_hold_obj(dp, za->za_first_integer, FTAG, &clone)); spa_keystore_change_key_sync_impl(rddobj, clone->ds_dir->dd_object, new_rddobj, wkey, B_TRUE, tx); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(zc); kmem_free(za, sizeof (zap_attribute_t)); kmem_free(zc, sizeof (zap_cursor_t)); dsl_dir_rele(dd, FTAG); } static void spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_t *ds; avl_index_t where; dsl_pool_t *dp = dmu_tx_pool(tx); spa_t *spa = dp->dp_spa; spa_keystore_change_key_args_t *skcka = arg; dsl_crypto_params_t *dcp = skcka->skcka_cp; dsl_wrapping_key_t *wkey = NULL, *found_wkey; dsl_wrapping_key_t wkey_search; char *keylocation = dcp->cp_keylocation; uint64_t rddobj, new_rddobj; /* create and initialize the wrapping key */ VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds)); ASSERT(!ds->ds_is_snapshot); if (dcp->cp_cmd == DCP_CMD_NEW_KEY || dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { /* * We are changing to a new wkey. Set additional properties * which can be sent along with this ioctl. Note that this * command can set keylocation even if it can't normally be * set via 'zfs set' due to a non-local keylocation. */ if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { wkey = dcp->cp_wkey; wkey->wk_ddobj = ds->ds_dir->dd_object; } else { keylocation = "prompt"; } if (keylocation != NULL) { dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); } VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj)); new_rddobj = ds->ds_dir->dd_object; } else { /* * We are inheritting the parent's wkey. Unset any local * keylocation and grab a reference to the wkey. */ if (dcp->cp_cmd == DCP_CMD_INHERIT) { VERIFY0(spa_keystore_wkey_hold_dd(spa, ds->ds_dir->dd_parent, FTAG, &wkey)); } dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, 0, 0, NULL, tx); rddobj = ds->ds_dir->dd_object; VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir->dd_parent, &new_rddobj)); } if (wkey == NULL) { ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT || dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY); } rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); /* recurse through all children and rewrap their keys */ spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, new_rddobj, wkey, B_FALSE, tx); /* * All references to the old wkey should be released now (if it * existed). Replace the wrapping key. */ wkey_search.wk_ddobj = ds->ds_dir->dd_object; found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL); if (found_wkey != NULL) { ASSERT0(zfs_refcount_count(&found_wkey->wk_refcnt)); avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); dsl_wrapping_key_free(found_wkey); } if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); } else if (wkey != NULL) { dsl_wrapping_key_rele(wkey, FTAG); } rw_exit(&spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(ds, FTAG); } int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) { spa_keystore_change_key_args_t skcka; /* initialize the args struct */ skcka.skcka_dsname = dsname; skcka.skcka_cp = dcp; /* * Perform the actual work in syncing context. The blocks modified * here could be calculated but it would require holding the pool * lock and traversing all of the datasets that will have their keys * changed. */ return (dsl_sync_task(dsname, spa_keystore_change_key_check, spa_keystore_change_key_sync, &skcka, 15, ZFS_SPACE_CHECK_RESERVED)); } int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) { int ret; uint64_t curr_rddobj, parent_rddobj; if (dd->dd_crypto_obj == 0) return (0); ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); if (ret != 0) goto error; /* * if this is not an encryption root, we must make sure we are not * moving dd to a new encryption root */ if (dd->dd_object != curr_rddobj) { ret = dsl_dir_get_encryption_root_ddobj(newparent, &parent_rddobj); if (ret != 0) goto error; if (parent_rddobj != curr_rddobj) { ret = SET_ERROR(EACCES); goto error; } } return (0); error: return (ret); } /* * Check to make sure that a promote from targetdd to origindd will not require * any key rewraps. */ int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) { int ret; uint64_t rddobj, op_rddobj, tp_rddobj; /* If the dataset is not encrypted we don't need to check anything */ if (origin->dd_crypto_obj == 0) return (0); /* * If we are not changing the first origin snapshot in a chain * the encryption root won't change either. */ if (dsl_dir_is_clone(origin)) return (0); /* * If the origin is the encryption root we will update * the DSL Crypto Key to point to the target instead. */ ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj); if (ret != 0) return (ret); if (rddobj == origin->dd_object) return (0); /* * The origin is inheriting its encryption root from its parent. * Check that the parent of the target has the same encryption root. */ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); if (ret == ENOENT) return (SET_ERROR(EACCES)); else if (ret != 0) return (ret); ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); if (ret == ENOENT) return (SET_ERROR(EACCES)); else if (ret != 0) return (ret); if (op_rddobj != tp_rddobj) return (SET_ERROR(EACCES)); return (0); } void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, dmu_tx_t *tx) { uint64_t rddobj; dsl_pool_t *dp = target->dd_pool; dsl_dataset_t *targetds; dsl_dataset_t *originds; char *keylocation; if (origin->dd_crypto_obj == 0) return; if (dsl_dir_is_clone(origin)) return; VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj)); if (rddobj != origin->dd_object) return; /* * If the target is being promoted to the encryption root update the * DSL Crypto Key and keylocation to reflect that. We also need to * update the DSL Crypto Keys of all children inheritting their * encryption root to point to the new target. Otherwise, the check * function ensured that the encryption root will not change. */ keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds)); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds)); VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), 1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE)); dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, 0, 0, NULL, tx); rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, target->dd_object, NULL, B_FALSE, tx); rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(targetds, FTAG); dsl_dataset_rele(originds, FTAG); kmem_free(keylocation, ZAP_MAXVALUELEN); } int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt) { int ret; uint64_t pcrypt, crypt; dsl_crypto_params_t dummy_dcp = { 0 }; if (will_encrypt != NULL) *will_encrypt = B_FALSE; if (dcp == NULL) dcp = &dummy_dcp; if (dcp->cp_cmd != DCP_CMD_NONE) return (SET_ERROR(EINVAL)); if (parentdd != NULL) { ret = dsl_dir_get_crypt(parentdd, &pcrypt); if (ret != 0) return (ret); } else { pcrypt = ZIO_CRYPT_OFF; } crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt; ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); /* check for valid dcp with no encryption (inherited or local) */ if (crypt == ZIO_CRYPT_OFF) { /* Must not specify encryption params */ if (dcp->cp_wkey != NULL || (dcp->cp_keylocation != NULL && strcmp(dcp->cp_keylocation, "none") != 0)) return (SET_ERROR(EINVAL)); return (0); } if (will_encrypt != NULL) *will_encrypt = B_TRUE; /* * We will now definitely be encrypting. Check the feature flag. When * creating the pool the caller will check this for us since we won't * technically have the feature activated yet. */ if (parentdd != NULL && !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, SPA_FEATURE_ENCRYPTION)) { return (SET_ERROR(EOPNOTSUPP)); } /* Check for errata #4 (encryption enabled, bookmark_v2 disabled) */ if (parentdd != NULL && !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, SPA_FEATURE_BOOKMARK_V2)) { return (SET_ERROR(EOPNOTSUPP)); } /* handle inheritance */ if (dcp->cp_wkey == NULL) { ASSERT3P(parentdd, !=, NULL); /* key must be fully unspecified */ if (dcp->cp_keylocation != NULL) return (SET_ERROR(EINVAL)); /* parent must have a key to inherit */ if (pcrypt == ZIO_CRYPT_OFF) return (SET_ERROR(EINVAL)); /* check for parent key */ ret = dmu_objset_check_wkey_loaded(parentdd); if (ret != 0) return (ret); return (0); } /* At this point we should have a fully specified key. Check location */ if (dcp->cp_keylocation == NULL || !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) return (SET_ERROR(EINVAL)); /* Must have fully specified keyformat */ switch (dcp->cp_wkey->wk_keyformat) { case ZFS_KEYFORMAT_HEX: case ZFS_KEYFORMAT_RAW: /* requires no pbkdf2 iters and salt */ if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) return (SET_ERROR(EINVAL)); break; case ZFS_KEYFORMAT_PASSPHRASE: /* requires pbkdf2 iters and salt */ if (dcp->cp_wkey->wk_salt == 0 || dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) return (SET_ERROR(EINVAL)); break; case ZFS_KEYFORMAT_NONE: default: /* keyformat must be specified and valid */ return (SET_ERROR(EINVAL)); } return (0); } void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; uint64_t crypt; dsl_wrapping_key_t *wkey; /* clones always use their origin's wrapping key */ if (dsl_dir_is_clone(dd)) { ASSERT3P(dcp, ==, NULL); /* * If this is an encrypted clone we just need to clone the * dck into dd. Zapify the dd so we can do that. */ if (origin->ds_dir->dd_crypto_obj != 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); dd->dd_crypto_obj = dsl_crypto_key_clone_sync(origin->ds_dir, tx); VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); } return; } /* * A NULL dcp at this point indicates this is the origin dataset * which does not have an objset to encrypt. Raw receives will handle * encryption separately later. In both cases we can simply return. */ if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV) return; crypt = dcp->cp_crypt; wkey = dcp->cp_wkey; /* figure out the effective crypt */ if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL) VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt)); /* if we aren't doing encryption just return */ if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT) return; /* zapify the dd so that we can add the crypto key obj to it */ dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); /* use the new key if given or inherit from the parent */ if (wkey == NULL) { VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa, dd->dd_parent, FTAG, &wkey)); } else { wkey->wk_ddobj = dd->dd_object; } ASSERT3P(wkey, !=, NULL); /* Create or clone the DSL crypto key and activate the feature */ dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx); VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, (void *)B_TRUE, tx); /* * If we inherited the wrapping key we release our reference now. * Otherwise, this is a new key and we need to load it into the * keystore. */ if (dcp->cp_wkey == NULL) { dsl_wrapping_key_rele(wkey, FTAG); } else { VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey)); } } typedef struct dsl_crypto_recv_key_arg { uint64_t dcrka_dsobj; uint64_t dcrka_fromobj; dmu_objset_type_t dcrka_ostype; nvlist_t *dcrka_nvl; boolean_t dcrka_do_key; } dsl_crypto_recv_key_arg_t; static int dsl_crypto_recv_raw_objset_check(dsl_dataset_t *ds, dsl_dataset_t *fromds, dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx) { int ret; objset_t *os; dnode_t *mdn; uint8_t *buf = NULL; uint_t len; uint64_t intval, nlevels, blksz, ibs; uint64_t nblkptr, maxblkid; if (ostype != DMU_OST_ZFS && ostype != DMU_OST_ZVOL) return (SET_ERROR(EINVAL)); /* raw receives also need info about the structure of the metadnode */ ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval); if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval); if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels); if (ret != 0 || nlevels > DN_MAX_LEVELS) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz); if (ret != 0 || blksz < SPA_MINBLOCKSIZE) return (SET_ERROR(EINVAL)); else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa)) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs); if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT || ibs > DN_MAX_INDBLKSHIFT) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr); if (ret != 0 || nblkptr != DN_MAX_NBLKPTR) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); if (ret != 0) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) return (SET_ERROR(EINVAL)); ret = dmu_objset_from_ds(ds, &os); if (ret != 0) return (ret); - /* - * Useraccounting is not portable and must be done with the keys loaded. - * Therefore, whenever we do any kind of receive the useraccounting - * must not be present. - */ - ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE); - ASSERT0(os->os_flags & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE); - mdn = DMU_META_DNODE(os); /* * If we already created the objset, make sure its unchangeable * properties match the ones received in the nvlist. */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) && (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz || mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) { rrw_exit(&ds->ds_bp_rwlock, FTAG); return (SET_ERROR(EINVAL)); } rrw_exit(&ds->ds_bp_rwlock, FTAG); /* * Check that the ivset guid of the fromds matches the one from the * send stream. Older versions of the encryption code did not have * an ivset guid on the from dataset and did not send one in the * stream. For these streams we provide the * zfs_disable_ivset_guid_check tunable to allow these datasets to * be received with a generated ivset guid. */ if (fromds != NULL && !zfs_disable_ivset_guid_check) { uint64_t from_ivset_guid = 0; intval = 0; (void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval); (void) zap_lookup(tx->tx_pool->dp_meta_objset, fromds->ds_object, DS_FIELD_IVSET_GUID, sizeof (from_ivset_guid), 1, &from_ivset_guid); if (intval == 0 || from_ivset_guid == 0) return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING)); if (intval != from_ivset_guid) return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH)); } return (0); } static void dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx) { dsl_pool_t *dp = tx->tx_pool; objset_t *os; dnode_t *mdn; zio_t *zio; uint8_t *portable_mac; uint_t len; uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; boolean_t newds = B_FALSE; VERIFY0(dmu_objset_from_ds(ds, &os)); mdn = DMU_META_DNODE(os); /* * Fetch the values we need from the nvlist. "to_ivset_guid" must * be set on the snapshot, which doesn't exist yet. The receive * code will take care of this for us later. */ compress = fnvlist_lookup_uint64(nvl, "mdn_compress"); checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum"); nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac, &len)); /* if we haven't created an objset for the ds yet, do that now */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { (void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), ostype, nlevels, blksz, ibs, tx); newds = B_TRUE; } rrw_exit(&ds->ds_bp_rwlock, FTAG); /* * Set the portable MAC. The local MAC will always be zero since the * incoming data will all be portable and user accounting will be * deferred until the next mount. Afterwards, flag the os to be * written out raw next time. */ arc_release(os->os_phys_buf, &os->os_phys_buf); bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN); bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN); + os->os_flags &= ~OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; /* set metadnode compression and checksum */ mdn->dn_compress = compress; mdn->dn_checksum = checksum; rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE); rw_exit(&mdn->dn_struct_rwlock); /* * We can't normally dirty the dataset in syncing context unless * we are creating a new dataset. In this case, we perform a * pseudo txg sync here instead. */ if (newds) { dsl_dataset_dirty(ds, tx); } else { zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); VERIFY0(zio_wait(zio)); /* dsl_dataset_sync_done will drop this reference. */ dmu_buf_add_ref(ds->ds_dbuf, ds); dsl_dataset_sync_done(ds, tx); } } int dsl_crypto_recv_raw_key_check(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) { int ret; objset_t *mos = tx->tx_pool->dp_meta_objset; uint8_t *buf = NULL; uint_t len; uint64_t intval, key_guid, version; boolean_t is_passphrase = B_FALSE; ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT); /* * Read and check all the encryption values from the nvlist. We need * all of the fields of a DSL Crypto Key, as well as a fully specified * wrapping key. */ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval); if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS || intval <= ZIO_CRYPT_OFF) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval); if (ret != 0) return (SET_ERROR(EINVAL)); /* * If this is an incremental receive make sure the given key guid * matches the one we already have. */ if (ds->ds_dir->dd_crypto_obj != 0) { ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); if (ret != 0) return (ret); if (intval != key_guid) return (SET_ERROR(EACCES)); } ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, &buf, &len); if (ret != 0 || len != MASTER_KEY_MAX_LEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, &buf, &len); if (ret != 0 || len != SHA512_HMAC_KEYLEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len); if (ret != 0 || len != WRAPPING_IV_LEN) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len); if (ret != 0 || len != WRAPPING_MAC_LEN) return (SET_ERROR(EINVAL)); /* * We don't support receiving old on-disk formats. The version 0 * implementation protected several fields in an objset that were * not always portable during a raw receive. As a result, we call * the old version an on-disk errata #3. */ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) return (SET_ERROR(ENOTSUP)); ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval); if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS || intval == ZFS_KEYFORMAT_NONE) return (SET_ERROR(EINVAL)); is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE); /* * for raw receives we allow any number of pbkdf2iters since there * won't be a chance for the user to change it. */ ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval); if (ret != 0 || (is_passphrase == (intval == 0))) return (SET_ERROR(EINVAL)); ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &intval); if (ret != 0 || (is_passphrase == (intval == 0))) return (SET_ERROR(EINVAL)); return (0); } void dsl_crypto_recv_raw_key_sync(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) { dsl_pool_t *dp = tx->tx_pool; objset_t *mos = dp->dp_meta_objset; dsl_dir_t *dd = ds->ds_dir; uint_t len; uint64_t rddobj, one = 1; uint8_t *keydata, *hmac_keydata, *iv, *mac; uint64_t crypt, key_guid, keyformat, iters, salt; uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; char *keylocation = "prompt"; /* lookup the values we need to create the DSL Crypto Key */ crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE); key_guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID); keyformat = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); iters = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); salt = fnvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, &keydata, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, &hmac_keydata, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len)); VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len)); /* if this is a new dataset setup the DSL Crypto Key. */ if (dd->dd_crypto_obj == 0) { /* zapify the dsl dir so we can add the key object to it */ dmu_buf_will_dirty(dd->dd_dbuf, tx); dsl_dir_zapify(dd, tx); /* create the DSL Crypto Key on disk and activate the feature */ dd->dd_crypto_obj = zap_create(mos, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); dsl_dataset_activate_feature(ds->ds_object, SPA_FEATURE_ENCRYPTION, (void *)B_TRUE, tx); ds->ds_feature[SPA_FEATURE_ENCRYPTION] = (void *)B_TRUE; /* save the dd_crypto_obj on disk */ VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); /* * Set the keylocation to prompt by default. If keylocation * has been provided via the properties, this will be overridden * later. */ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); rddobj = dd->dd_object; } else { VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &rddobj)); } /* sync the key data to the ZAP object on disk */ dsl_crypto_key_sync_impl(mos, dd->dd_crypto_obj, crypt, rddobj, key_guid, iv, mac, keydata, hmac_keydata, keyformat, salt, iters, tx); } static int dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) { int ret; dsl_crypto_recv_key_arg_t *dcrka = arg; dsl_dataset_t *ds = NULL, *fromds = NULL; ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); if (ret != 0) goto out; if (dcrka->dcrka_fromobj != 0) { ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_fromobj, FTAG, &fromds); if (ret != 0) goto out; } ret = dsl_crypto_recv_raw_objset_check(ds, fromds, dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx); if (ret != 0) goto out; /* * We run this check even if we won't be doing this part of * the receive now so that we don't make the user wait until * the receive finishes to fail. */ ret = dsl_crypto_recv_raw_key_check(ds, dcrka->dcrka_nvl, tx); if (ret != 0) goto out; out: if (ds != NULL) dsl_dataset_rele(ds, FTAG); if (fromds != NULL) dsl_dataset_rele(fromds, FTAG); return (ret); } static void dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) { dsl_crypto_recv_key_arg_t *dcrka = arg; dsl_dataset_t *ds; VERIFY0(dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds)); dsl_crypto_recv_raw_objset_sync(ds, dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx); if (dcrka->dcrka_do_key) dsl_crypto_recv_raw_key_sync(ds, dcrka->dcrka_nvl, tx); dsl_dataset_rele(ds, FTAG); } /* * This function is used to sync an nvlist representing a DSL Crypto Key and * the associated encryption parameters. The key will be written exactly as is * without wrapping it. */ int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key) { dsl_crypto_recv_key_arg_t dcrka; dcrka.dcrka_dsobj = dsobj; dcrka.dcrka_fromobj = fromobj; dcrka.dcrka_ostype = ostype; dcrka.dcrka_nvl = nvl; dcrka.dcrka_do_key = do_key; return (dsl_sync_task(poolname, dsl_crypto_recv_key_check, dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL)); } int dsl_crypto_populate_key_nvlist(objset_t *os, uint64_t from_ivset_guid, nvlist_t **nvl_out) { int ret; dsl_dataset_t *ds = os->os_dsl_dataset; dnode_t *mdn; uint64_t rddobj; nvlist_t *nvl = NULL; uint64_t dckobj = ds->ds_dir->dd_crypto_obj; dsl_dir_t *rdd = NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t crypt = 0, key_guid = 0, format = 0; uint64_t iters = 0, salt = 0, version = 0; uint64_t to_ivset_guid = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; uint8_t mac[WRAPPING_MAC_LEN]; ASSERT(dckobj != 0); mdn = DMU_META_DNODE(os); nvl = fnvlist_alloc(); /* lookup values from the DSL Crypto Key */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, &crypt); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, MASTER_KEY_MAX_LEN, raw_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, SHA512_HMAC_KEYLEN, raw_hmac_keydata); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, iv); if (ret != 0) goto error; ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, mac); if (ret != 0) goto error; /* see zfs_disable_ivset_guid_check tunable for errata info */ ret = zap_lookup(mos, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, &to_ivset_guid); if (ret != 0) ASSERT3U(dp->dp_spa->spa_errata, !=, 0); /* * We don't support raw sends of legacy on-disk formats. See the * comment in dsl_crypto_recv_key_check() for details. */ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; ret = SET_ERROR(ENOTSUP); goto error; } /* * Lookup wrapping key properties. An early version of the code did * not correctly add these values to the wrapping key or the DSL * Crypto Key on disk for non encryption roots, so to be safe we * always take the slightly circuitous route of looking it up from * the encryption root's key. */ ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj); if (ret != 0) goto error; dsl_pool_config_enter(dp, FTAG); ret = dsl_dir_hold_obj(dp, rddobj, NULL, FTAG, &rdd); if (ret != 0) goto error_unlock; ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format); if (ret != 0) goto error_unlock; if (format == ZFS_KEYFORMAT_PASSPHRASE) { ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); if (ret != 0) goto error_unlock; ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); if (ret != 0) goto error_unlock; } dsl_dir_rele(rdd, FTAG); dsl_pool_config_exit(dp, FTAG); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, key_guid); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, raw_keydata, MASTER_KEY_MAX_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, raw_hmac_keydata, SHA512_HMAC_KEYLEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv, WRAPPING_IV_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac, WRAPPING_MAC_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac", os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN)); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum); fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress); fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels); fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); fnvlist_add_uint64(nvl, "to_ivset_guid", to_ivset_guid); fnvlist_add_uint64(nvl, "from_ivset_guid", from_ivset_guid); *nvl_out = nvl; return (0); error_unlock: dsl_pool_config_exit(dp, FTAG); error: if (rdd != NULL) dsl_dir_rele(rdd, FTAG); nvlist_free(nvl); *nvl_out = NULL; return (ret); } uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { dsl_crypto_key_t dck; uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; uint64_t one = 1ULL; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(crypt, >, ZIO_CRYPT_OFF); /* create the DSL Crypto Key ZAP object */ dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); /* fill in the key (on the stack) and sync it to disk */ dck.dck_wkey = wkey; VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key)); dsl_crypto_key_sync(&dck, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); zio_crypt_key_destroy(&dck.dck_key); bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); return (dck.dck_obj); } uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx) { objset_t *mos = tx->tx_pool->dp_meta_objset; ASSERT(dmu_tx_is_syncing(tx)); VERIFY0(zap_increment(mos, origindd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, 1, tx)); return (origindd->dd_crypto_obj); } void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx) { objset_t *mos = tx->tx_pool->dp_meta_objset; uint64_t refcnt; /* Decrement the refcount, destroy if this is the last reference */ VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &refcnt)); if (refcnt != 1) { VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, -1, tx)); } else { VERIFY0(zap_destroy(mos, dckobj, tx)); } } void dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t intval; dsl_dir_t *dd = ds->ds_dir; dsl_dir_t *enc_root; char buf[ZFS_MAX_DATASET_NAME_LEN]; if (dd->dd_crypto_obj == 0) return; intval = dsl_dataset_get_keystatus(dd); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval); if (dsl_dir_get_crypt(dd, &intval) == 0) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval); if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval); } if (zap_lookup(dd->dd_pool->dp_meta_objset, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, &intval) == 0) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_IVSET_GUID, intval); } if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { if (dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, &enc_root) == 0) { dsl_dir_name(enc_root, buf); dsl_dir_rele(enc_root, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); } } } int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) { int ret; dsl_crypto_key_t *dck = NULL; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; ret = zio_crypt_key_get_salt(&dck->dck_key, salt); if (ret != 0) goto error; spa_keystore_dsl_key_rele(spa, dck, FTAG); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); return (ret); } /* * Objset blocks are a special case for MAC generation. These blocks have 2 * 256-bit MACs which are embedded within the block itself, rather than a * single 128 bit MAC. As a result, this function handles encoding and decoding * the MACs on its own, unlike other functions in this file. */ int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, boolean_t byteswap) { int ret; dsl_crypto_key_t *dck = NULL; void *buf = abd_borrow_buf_copy(abd, datalen); objset_phys_t *osp = buf; uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; /* calculate both HMACs */ ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen, byteswap, portable_mac, local_mac); if (ret != 0) goto error; spa_keystore_dsl_key_rele(spa, dck, FTAG); /* if we are generating encode the HMACs in the objset_phys_t */ if (generate) { bcopy(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN); bcopy(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN); abd_return_buf_copy(abd, buf, datalen); return (0); } if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 || bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) { abd_return_buf(abd, buf, datalen); return (SET_ERROR(ECKSUM)); } abd_return_buf(abd, buf, datalen); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); abd_return_buf(abd, buf, datalen); return (ret); } int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, uint8_t *mac) { int ret; dsl_crypto_key_t *dck = NULL; uint8_t *buf = abd_borrow_buf_copy(abd, datalen); uint8_t digestbuf[ZIO_DATA_MAC_LEN]; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) goto error; /* perform the hmac */ ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, digestbuf, ZIO_DATA_MAC_LEN); if (ret != 0) goto error; abd_return_buf(abd, buf, datalen); spa_keystore_dsl_key_rele(spa, dck, FTAG); /* * Truncate and fill in mac buffer if we were asked to generate a MAC. * Otherwise verify that the MAC matched what we expected. */ if (generate) { bcopy(digestbuf, mac, ZIO_DATA_MAC_LEN); return (0); } if (bcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0) return (SET_ERROR(ECKSUM)); return (0); error: if (dck != NULL) spa_keystore_dsl_key_rele(spa, dck, FTAG); abd_return_buf(abd, buf, datalen); return (ret); } /* * This function serves as a multiplexer for encryption and decryption of * all blocks (except the L2ARC). For encryption, it will populate the IV, * salt, MAC, and cabd (the ciphertext). On decryption it will simply use * these fields to populate pabd (the plaintext). */ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) { int ret; dsl_crypto_key_t *dck = NULL; uint8_t *plainbuf = NULL, *cipherbuf = NULL; ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, zb->zb_objset, FTAG, &dck); if (ret != 0) { ret = SET_ERROR(EACCES); return (ret); } if (encrypt) { plainbuf = abd_borrow_buf_copy(pabd, datalen); cipherbuf = abd_borrow_buf(cabd, datalen); } else { plainbuf = abd_borrow_buf(pabd, datalen); cipherbuf = abd_borrow_buf_copy(cabd, datalen); } /* * Both encryption and decryption functions need a salt for key * generation and an IV. When encrypting a non-dedup block, we * generate the salt and IV randomly to be stored by the caller. Dedup * blocks perform a (more expensive) HMAC of the plaintext to obtain * the salt and the IV. ZIL blocks have their salt and IV generated * at allocation time in zio_alloc_zil(). On decryption, we simply use * the provided values. */ if (encrypt && ot != DMU_OT_INTENT_LOG && !dedup) { ret = zio_crypt_key_get_salt(&dck->dck_key, salt); if (ret != 0) goto error; ret = zio_crypt_generate_iv(iv); if (ret != 0) goto error; } else if (encrypt && dedup) { ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key, plainbuf, datalen, iv, salt); if (ret != 0) goto error; } /* call lower level function to perform encryption / decryption */ ret = zio_do_crypt_data(encrypt, &dck->dck_key, ot, bswap, salt, iv, mac, datalen, plainbuf, cipherbuf, no_crypt); /* * Handle injected decryption faults. Unfortunately, we cannot inject * faults for dnode blocks because we might trigger the panic in * dbuf_prepare_encrypted_dnode_leaf(), which exists because syncing * context is not prepared to handle malicious decryption failures. */ if (zio_injection_enabled && !encrypt && ot != DMU_OT_DNODE && ret == 0) ret = zio_handle_decrypt_injection(spa, zb, ot, ECKSUM); if (ret != 0) goto error; if (encrypt) { abd_return_buf(pabd, plainbuf, datalen); abd_return_buf_copy(cabd, cipherbuf, datalen); } else { abd_return_buf_copy(pabd, plainbuf, datalen); abd_return_buf(cabd, cipherbuf, datalen); } spa_keystore_dsl_key_rele(spa, dck, FTAG); return (0); error: if (encrypt) { /* zero out any state we might have changed while encrypting */ bzero(salt, ZIO_DATA_SALT_LEN); bzero(iv, ZIO_DATA_IV_LEN); bzero(mac, ZIO_DATA_MAC_LEN); abd_return_buf(pabd, plainbuf, datalen); abd_return_buf_copy(cabd, cipherbuf, datalen); } else { abd_return_buf_copy(pabd, plainbuf, datalen); abd_return_buf(cabd, cipherbuf, datalen); } spa_keystore_dsl_key_rele(spa, dck, FTAG); return (ret); } ZFS_MODULE_PARAM(zfs, zfs_, disable_ivset_guid_check, INT, ZMOD_RW, "Set to allow raw receives without IVset guids"); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 0f0eab050fa3..1c0042af1858 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -1,963 +1,964 @@ # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # # This run file contains all of the common functional tests. When # adding a new test consider also adding it to the sanity.run file # if the new test runs to completion in only a few seconds. # # Approximate run time: 4-5 hours # [DEFAULT] pre = setup quiet = False pre_user = root user = root timeout = 600 post_user = root post = cleanup failsafe_user = root failsafe = callbacks/zfs_failsafe outputdir = /var/tmp/test_results tags = ['functional'] [tests/functional/acl/off] tests = ['posixmode'] tags = ['functional', 'acl'] [tests/functional/alloc_class] tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos', 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos', 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos', 'alloc_class_013_pos'] tags = ['functional', 'alloc_class'] [tests/functional/arc] tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos', 'arcstats_runtime_tuning'] tags = ['functional', 'arc'] [tests/functional/atime] tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on'] tags = ['functional', 'atime'] [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos', 'bootfs_008_pos'] tags = ['functional', 'bootfs'] [tests/functional/btree] tests = ['btree_positive', 'btree_negative'] tags = ['functional', 'btree'] pre = post = [tests/functional/cache] tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg', 'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg', 'cache_009_pos', 'cache_010_pos', 'cache_011_pos', 'cache_012_pos'] tags = ['functional', 'cache'] [tests/functional/cachefile] tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos', 'cachefile_004_pos'] tags = ['functional', 'cachefile'] [tests/functional/casenorm] tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure', 'sensitive_none_lookup', 'sensitive_none_delete', 'sensitive_formd_lookup', 'sensitive_formd_delete', 'insensitive_none_lookup', 'insensitive_none_delete', 'insensitive_formd_lookup', 'insensitive_formd_delete', 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete', 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete'] tags = ['functional', 'casenorm'] [tests/functional/channel_program/lua_core] tests = ['tst.args_to_lua', 'tst.divide_by_zero', 'tst.exists', 'tst.integer_illegal', 'tst.integer_overflow', 'tst.language_functions_neg', 'tst.language_functions_pos', 'tst.large_prog', 'tst.libraries', 'tst.memory_limit', 'tst.nested_neg', 'tst.nested_pos', 'tst.nvlist_to_lua', 'tst.recursive_neg', 'tst.recursive_pos', 'tst.return_large', 'tst.return_nvlist_neg', 'tst.return_nvlist_pos', 'tst.return_recursive_table', 'tst.stack_gsub', 'tst.timeout'] tags = ['functional', 'channel_program', 'lua_core'] [tests/functional/channel_program/synctask_core] tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit', 'tst.get_index_props', 'tst.get_mountpoint', 'tst.get_neg', 'tst.get_number_props', 'tst.get_string_props', 'tst.get_type', 'tst.get_userquota', 'tst.get_written', 'tst.inherit', 'tst.list_bookmarks', 'tst.list_children', 'tst.list_clones', 'tst.list_holds', 'tst.list_snapshots', 'tst.list_system_props', 'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict', 'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult', 'tst.rollback_one', 'tst.set_props', 'tst.snapshot_destroy', 'tst.snapshot_neg', 'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.bookmark.create', 'tst.bookmark.copy', 'tst.terminate_by_signal' ] tags = ['functional', 'channel_program', 'synctask_core'] [tests/functional/checksum] tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos', 'filetest_002_pos'] tags = ['functional', 'checksum'] [tests/functional/clean_mirror] tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos', 'clean_mirror_003_pos', 'clean_mirror_004_pos'] tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress', 'zdb_display_block', 'zdb_label_checksum', 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2'] pre = post = tags = ['functional', 'cli_root', 'zdb'] [tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos'] tags = ['functional', 'cli_root', 'zfs'] [tests/functional/cli_root/zfs_bookmark] tests = ['zfs_bookmark_cliargs'] tags = ['functional', 'cli_root', 'zfs_bookmark'] [tests/functional/cli_root/zfs_change-key] tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones'] tags = ['functional', 'cli_root', 'zfs_change-key'] [tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', 'zfs_clone_010_pos', 'zfs_clone_encrypted', 'zfs_clone_deeply_nested'] tags = ['functional', 'cli_root', 'zfs_clone'] [tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', 'zfs_copies_004_neg', 'zfs_copies_005_neg', 'zfs_copies_006_pos'] tags = ['functional', 'cli_root', 'zfs_copies'] [tests/functional/cli_root/zfs_create] tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted', 'zfs_create_crypt_combos', 'zfs_create_dryrun', 'zfs_create_nomount', 'zfs_create_verbose'] tags = ['functional', 'cli_root', 'zfs_create'] [tests/functional/cli_root/zfs_destroy] tests = ['zfs_clone_livelist_condense_and_disable', 'zfs_clone_livelist_condense_races', 'zfs_clone_livelist_dedup', 'zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', 'zfs_destroy_004_pos', 'zfs_destroy_005_neg', 'zfs_destroy_006_neg', 'zfs_destroy_007_neg', 'zfs_destroy_008_pos', 'zfs_destroy_009_pos', 'zfs_destroy_010_pos', 'zfs_destroy_011_pos', 'zfs_destroy_012_pos', 'zfs_destroy_013_neg', 'zfs_destroy_014_pos', 'zfs_destroy_015_pos', 'zfs_destroy_016_pos', 'zfs_destroy_clone_livelist', 'zfs_destroy_dev_removal', 'zfs_destroy_dev_removal_condense'] tags = ['functional', 'cli_root', 'zfs_destroy'] [tests/functional/cli_root/zfs_diff] tests = ['zfs_diff_changes', 'zfs_diff_cliargs', 'zfs_diff_timestamp', 'zfs_diff_types', 'zfs_diff_encrypted', 'zfs_diff_mangle'] tags = ['functional', 'cli_root', 'zfs_diff'] [tests/functional/cli_root/zfs_get] tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', 'zfs_get_004_pos', 'zfs_get_005_neg', 'zfs_get_006_neg', 'zfs_get_007_neg', 'zfs_get_008_pos', 'zfs_get_009_pos', 'zfs_get_010_neg'] tags = ['functional', 'cli_root', 'zfs_get'] [tests/functional/cli_root/zfs_ids_to_path] tests = ['zfs_ids_to_path_001_pos'] tags = ['functional', 'cli_root', 'zfs_ids_to_path'] [tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos', 'zfs_inherit_mountpoint'] tags = ['functional', 'cli_root', 'zfs_inherit'] [tests/functional/cli_root/zfs_load-key] tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', 'zfs_load-key_https', 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] tags = ['functional', 'cli_root', 'zfs_load-key'] [tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_pos', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', 'zfs_mount_test_race'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] tests = ['zfs_program_json'] tags = ['functional', 'cli_root', 'zfs_program'] [tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] tags = ['functional', 'cli_root', 'zfs_promote'] [tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] tags = ['functional', 'cli_root', 'zfs_property'] [tests/functional/cli_root/zfs_receive] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_004_neg', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', 'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos', 'zfs_receive_016_pos', 'receive-o-x_props_override', 'receive-o-x_props_aliases', 'zfs_receive_from_encrypted', 'zfs_receive_to_encrypted', 'zfs_receive_raw', 'zfs_receive_raw_incremental', 'zfs_receive_-e', 'zfs_receive_raw_-d', 'zfs_receive_from_zstd', 'zfs_receive_new_props'] tags = ['functional', 'cli_root', 'zfs_receive'] [tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child', 'zfs_rename_to_encrypted', 'zfs_rename_mountpoint', 'zfs_rename_nounmount'] tags = ['functional', 'cli_root', 'zfs_rename'] [tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] tags = ['functional', 'cli_root', 'zfs_reservation'] [tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', 'zfs_rollback_003_neg', 'zfs_rollback_004_neg'] tags = ['functional', 'cli_root', 'zfs_rollback'] [tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw', 'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing'] tags = ['functional', 'cli_root', 'zfs_send'] [tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos', 'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos', 'mountpoint_002_pos', 'reservation_001_neg', 'user_property_002_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos', 'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', 'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos', 'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation', 'zfs_set_feature_activation'] tags = ['functional', 'cli_root', 'zfs_set'] [tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', 'zfs_share_004_pos', 'zfs_share_006_pos', 'zfs_share_008_neg', 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares'] tags = ['functional', 'cli_root', 'zfs_share'] [tests/functional/cli_root/zfs_snapshot] tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] tags = ['functional', 'cli_root', 'zfs_snapshot'] [tests/functional/cli_root/zfs_unload-key] tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] tags = ['functional', 'cli_root', 'zfs_unload-key'] [tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', 'zfs_unmount_007_neg', 'zfs_unmount_008_neg', 'zfs_unmount_009_pos', 'zfs_unmount_all_001_pos', 'zfs_unmount_nested', 'zfs_unmount_unload_keys'] tags = ['functional', 'cli_root', 'zfs_unmount'] [tests/functional/cli_root/zfs_unshare] tests = ['zfs_unshare_001_pos', 'zfs_unshare_002_pos', 'zfs_unshare_003_pos', 'zfs_unshare_004_neg', 'zfs_unshare_005_neg', 'zfs_unshare_006_pos', 'zfs_unshare_007_pos'] tags = ['functional', 'cli_root', 'zfs_unshare'] [tests/functional/cli_root/zfs_upgrade] tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos', 'zfs_upgrade_004_pos', 'zfs_upgrade_005_pos', 'zfs_upgrade_006_neg', 'zfs_upgrade_007_neg'] tags = ['functional', 'cli_root', 'zfs_upgrade'] [tests/functional/cli_root/zfs_wait] tests = ['zfs_wait_deleteq'] tags = ['functional', 'cli_root', 'zfs_wait'] [tests/functional/cli_root/zhack] tests = ['zhack_label_checksum'] pre = post = tags = ['functional', 'cli_root', 'zhack'] [tests/functional/cli_root/zpool] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors'] tags = ['functional', 'cli_root', 'zpool'] [tests/functional/cli_root/zpool_add] tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', 'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg', 'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos', 'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_add'] [tests/functional/cli_root/zpool_attach] tests = ['zpool_attach_001_neg', 'attach-o_ashift'] tags = ['functional', 'cli_root', 'zpool_attach'] [tests/functional/cli_root/zpool_clear] tests = ['zpool_clear_001_pos', 'zpool_clear_002_neg', 'zpool_clear_003_neg', 'zpool_clear_readonly'] tags = ['functional', 'cli_root', 'zpool_clear'] [tests/functional/cli_root/zpool_create] tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_003_pos', 'zpool_create_004_pos', 'zpool_create_005_pos', 'zpool_create_006_pos', 'zpool_create_007_neg', 'zpool_create_008_pos', 'zpool_create_009_neg', 'zpool_create_010_neg', 'zpool_create_011_neg', 'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos', 'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_features_005_pos', 'zpool_create_features_006_pos', 'zpool_create_features_007_pos', 'zpool_create_features_008_pos', 'zpool_create_features_009_pos', 'create-o_ashift', 'zpool_create_tempname', 'zpool_create_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_create'] [tests/functional/cli_root/zpool_destroy] tests = ['zpool_destroy_001_pos', 'zpool_destroy_002_pos', 'zpool_destroy_003_neg'] pre = post = tags = ['functional', 'cli_root', 'zpool_destroy'] [tests/functional/cli_root/zpool_detach] tests = ['zpool_detach_001_neg'] tags = ['functional', 'cli_root', 'zpool_detach'] [tests/functional/cli_root/zpool_events] tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow', 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates', 'zpool_events_clear_retained'] tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] tests = ['zpool_export_001_pos', 'zpool_export_002_pos', 'zpool_export_003_neg', 'zpool_export_004_pos'] tags = ['functional', 'cli_root', 'zpool_export'] [tests/functional/cli_root/zpool_get] tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos', 'zpool_get_004_neg', 'zpool_get_005_pos'] tags = ['functional', 'cli_root', 'zpool_get'] [tests/functional/cli_root/zpool_history] tests = ['zpool_history_001_neg', 'zpool_history_002_pos'] tags = ['functional', 'cli_root', 'zpool_history'] [tests/functional/cli_root/zpool_import] tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'zpool_import_003_pos', 'zpool_import_004_pos', 'zpool_import_005_pos', 'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos', 'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg', 'zpool_import_012_pos', 'zpool_import_013_neg', 'zpool_import_014_pos', 'zpool_import_015_pos', 'zpool_import_016_pos', 'zpool_import_017_pos', 'zpool_import_features_001_pos', 'zpool_import_features_002_neg', 'zpool_import_features_003_pos', 'zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos', 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos', 'zpool_import_encrypted', 'zpool_import_encrypted_load', 'zpool_import_errata3', 'zpool_import_errata4', 'import_cachefile_device_added', 'import_cachefile_device_removed', 'import_cachefile_device_replaced', 'import_cachefile_mirror_attached', 'import_cachefile_mirror_detached', 'import_cachefile_paths_changed', 'import_cachefile_shared_device', 'import_devices_missing', 'import_paths_changed', 'import_rewind_config_changed', 'import_rewind_device_replaced'] tags = ['functional', 'cli_root', 'zpool_import'] timeout = 1200 [tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported', 'zpool_labelclear_removed', 'zpool_labelclear_valid'] pre = post = tags = ['functional', 'cli_root', 'zpool_labelclear'] [tests/functional/cli_root/zpool_initialize] tests = ['zpool_initialize_attach_detach_add_remove', 'zpool_initialize_fault_export_import_online', 'zpool_initialize_import_export', 'zpool_initialize_offline_export_import_online', 'zpool_initialize_online_offline', 'zpool_initialize_split', 'zpool_initialize_start_and_cancel_neg', 'zpool_initialize_start_and_cancel_pos', 'zpool_initialize_suspend_resume', 'zpool_initialize_unsupported_vdevs', 'zpool_initialize_verify_checksums', 'zpool_initialize_verify_initialized'] pre = tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', 'zpool_offline_003_pos'] tags = ['functional', 'cli_root', 'zpool_offline'] [tests/functional/cli_root/zpool_online] tests = ['zpool_online_001_pos', 'zpool_online_002_neg'] tags = ['functional', 'cli_root', 'zpool_online'] [tests/functional/cli_root/zpool_remove] tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos', 'zpool_remove_003_pos'] tags = ['functional', 'cli_root', 'zpool_remove'] [tests/functional/cli_root/zpool_replace] tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift'] tags = ['functional', 'cli_root', 'zpool_replace'] [tests/functional/cli_root/zpool_resilver] tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart'] tags = ['functional', 'cli_root', 'zpool_resilver'] [tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing', 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] tags = ['functional', 'cli_root', 'zpool_scrub'] [tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg', 'zpool_set_ashift', 'zpool_set_features'] tags = ['functional', 'cli_root', 'zpool_set'] [tests/functional/cli_root/zpool_split] tests = ['zpool_split_cliargs', 'zpool_split_devices', 'zpool_split_encryption', 'zpool_split_props', 'zpool_split_vdevs', 'zpool_split_resilver', 'zpool_split_indirect', 'zpool_split_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] tests = ['zpool_status_001_pos', 'zpool_status_002_pos', 'zpool_status_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] tests = ['zpool_sync_001_pos', 'zpool_sync_002_neg'] tags = ['functional', 'cli_root', 'zpool_sync'] [tests/functional/cli_root/zpool_trim] tests = ['zpool_trim_attach_detach_add_remove', 'zpool_trim_fault_export_import_online', 'zpool_trim_import_export', 'zpool_trim_multiple', 'zpool_trim_neg', 'zpool_trim_offline_export_import_online', 'zpool_trim_online_offline', 'zpool_trim_partial', 'zpool_trim_rate', 'zpool_trim_rate_neg', 'zpool_trim_secure', 'zpool_trim_split', 'zpool_trim_start_and_cancel_neg', 'zpool_trim_start_and_cancel_pos', 'zpool_trim_suspend_resume', 'zpool_trim_unsupported_vdevs', 'zpool_trim_verify_checksums', 'zpool_trim_verify_trimmed'] tags = ['functional', 'zpool_trim'] [tests/functional/cli_root/zpool_upgrade] tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_002_pos', 'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos', 'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg', 'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos', 'zpool_upgrade_009_neg', 'zpool_upgrade_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_upgrade'] [tests/functional/cli_root/zpool_wait] tests = ['zpool_wait_discard', 'zpool_wait_freeing', 'zpool_wait_initialize_basic', 'zpool_wait_initialize_cancel', 'zpool_wait_initialize_flag', 'zpool_wait_multiple', 'zpool_wait_no_activity', 'zpool_wait_remove', 'zpool_wait_remove_cancel', 'zpool_wait_trim_basic', 'zpool_wait_trim_cancel', 'zpool_wait_trim_flag', 'zpool_wait_usage'] tags = ['functional', 'cli_root', 'zpool_wait'] [tests/functional/cli_root/zpool_wait/scan] tests = ['zpool_wait_replace_cancel', 'zpool_wait_rebuild', 'zpool_wait_resilver', 'zpool_wait_scrub_cancel', 'zpool_wait_replace', 'zpool_wait_scrub_basic', 'zpool_wait_scrub_flag'] tags = ['functional', 'cli_root', 'zpool_wait'] [tests/functional/cli_user/misc] tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg', 'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg', 'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg', 'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg', 'zfs_share_001_neg', 'zfs_snapshot_001_neg', 'zfs_unallow_001_neg', 'zfs_unmount_001_neg', 'zfs_unshare_001_neg', 'zfs_upgrade_001_neg', 'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg', 'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg', 'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg', 'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege'] user = tags = ['functional', 'cli_user', 'misc'] [tests/functional/cli_user/zfs_list] tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos', 'zfs_list_004_neg', 'zfs_list_007_pos', 'zfs_list_008_neg'] user = tags = ['functional', 'cli_user', 'zfs_list'] [tests/functional/cli_user/zpool_iostat] tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos', 'zpool_iostat_003_neg', 'zpool_iostat_004_pos', 'zpool_iostat_005_pos', 'zpool_iostat_-c_disable', 'zpool_iostat_-c_homedir', 'zpool_iostat_-c_searchpath'] user = tags = ['functional', 'cli_user', 'zpool_iostat'] [tests/functional/cli_user/zpool_list] tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = tags = ['functional', 'cli_user', 'zpool_list'] [tests/functional/cli_user/zpool_status] tests = ['zpool_status_003_pos', 'zpool_status_-c_disable', 'zpool_status_-c_homedir', 'zpool_status_-c_searchpath'] user = tags = ['functional', 'cli_user', 'zpool_status'] [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'l2arc_compressed_arc', 'l2arc_compressed_arc_disabled', 'l2arc_encrypted', 'l2arc_encrypted_no_compressed_arc'] tags = ['functional', 'compression'] [tests/functional/cp_files] tests = ['cp_files_001_pos'] tags = ['functional', 'cp_files'] [tests/functional/crtime] tests = ['crtime_001_pos' ] tags = ['functional', 'crtime'] [tests/functional/ctime] tests = ['ctime_001_pos' ] tags = ['functional', 'ctime'] [tests/functional/deadman] tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio'] pre = post = tags = ['functional', 'deadman'] [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', 'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg', 'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg', 'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos', 'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] tags = ['functional', 'delegate'] [tests/functional/exec] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] [tests/functional/fallocate] tests = ['fallocate_punch-hole'] tags = ['functional', 'fallocate'] [tests/functional/features/async_destroy] tests = ['async_destroy_001_pos'] tags = ['functional', 'features', 'async_destroy'] [tests/functional/features/large_dnode] tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos'] tags = ['functional', 'features', 'large_dnode'] [tests/functional/grow] pre = post = tests = ['grow_pool_001_pos', 'grow_replicas_001_pos'] tags = ['functional', 'grow'] [tests/functional/history] tests = ['history_001_pos', 'history_002_pos', 'history_003_pos', 'history_004_pos', 'history_005_neg', 'history_006_neg', 'history_007_pos', 'history_008_pos', 'history_009_pos', 'history_010_pos'] tags = ['functional', 'history'] [tests/functional/hkdf] tests = ['run_hkdf_test'] tags = ['functional', 'hkdf'] [tests/functional/inheritance] tests = ['inherit_001_pos'] pre = tags = ['functional', 'inheritance'] [tests/functional/io] tests = ['sync', 'psync', 'posixaio', 'mmap'] tags = ['functional', 'io'] [tests/functional/inuse] tests = ['inuse_004_pos', 'inuse_005_pos', 'inuse_008_pos', 'inuse_009_pos'] post = tags = ['functional', 'inuse'] [tests/functional/large_files] tests = ['large_files_001_pos', 'large_files_002_pos'] tags = ['functional', 'large_files'] [tests/functional/largest_pool] tests = ['largest_pool_001_pos'] pre = post = tags = ['functional', 'largest_pool'] [tests/functional/limits] tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count', 'snapshot_limit'] tags = ['functional', 'limits'] [tests/functional/link_count] tests = ['link_count_001', 'link_count_root_inode'] tags = ['functional', 'link_count'] [tests/functional/migration] tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', 'migration_004_pos', 'migration_005_pos', 'migration_006_pos', 'migration_007_pos', 'migration_008_pos', 'migration_009_pos', 'migration_010_pos', 'migration_011_pos', 'migration_012_pos'] tags = ['functional', 'migration'] [tests/functional/mmap] tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos'] tags = ['functional', 'mmap'] [tests/functional/mount] tests = ['umount_001', 'umountall_001'] tags = ['functional', 'mount'] [tests/functional/mv_files] tests = ['mv_files_001_pos', 'mv_files_002_pos', 'random_creation'] tags = ['functional', 'mv_files'] [tests/functional/nestedfs] tests = ['nestedfs_001_pos'] tags = ['functional', 'nestedfs'] [tests/functional/no_space] tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos', 'enospc_df'] tags = ['functional', 'no_space'] [tests/functional/nopwrite] tests = ['nopwrite_copies', 'nopwrite_mtime', 'nopwrite_negative', 'nopwrite_promoted_clone', 'nopwrite_recsize', 'nopwrite_sync', 'nopwrite_varying_compression', 'nopwrite_volume'] tags = ['functional', 'nopwrite'] [tests/functional/online_offline] tests = ['online_offline_001_pos', 'online_offline_002_neg', 'online_offline_003_neg'] tags = ['functional', 'online_offline'] [tests/functional/pool_checkpoint] tests = ['checkpoint_after_rewind', 'checkpoint_big_rewind', 'checkpoint_capacity', 'checkpoint_conf_change', 'checkpoint_discard', 'checkpoint_discard_busy', 'checkpoint_discard_many', 'checkpoint_indirect', 'checkpoint_invalid', 'checkpoint_lun_expsz', 'checkpoint_open', 'checkpoint_removal', 'checkpoint_rewind', 'checkpoint_ro_rewind', 'checkpoint_sm_scale', 'checkpoint_twice', 'checkpoint_vdev_add', 'checkpoint_zdb', 'checkpoint_zhack_feat'] tags = ['functional', 'pool_checkpoint'] timeout = 1800 [tests/functional/pool_names] tests = ['pool_names_001_pos', 'pool_names_002_neg'] pre = post = tags = ['functional', 'pool_names'] [tests/functional/poolversion] tests = ['poolversion_001_pos', 'poolversion_002_pos'] tags = ['functional', 'poolversion'] [tests/functional/pyzfs] tests = ['pyzfs_unittest'] pre = post = tags = ['functional', 'pyzfs'] [tests/functional/quota] tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos', 'quota_005_pos', 'quota_006_neg'] tags = ['functional', 'quota'] [tests/functional/redacted_send] tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted', 'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes', 'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones', 'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative', 'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume', 'redacted_size', 'redacted_volume'] tags = ['functional', 'redacted_send'] [tests/functional/raidz] tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos'] tags = ['functional', 'raidz'] [tests/functional/redundancy] tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3', 'redundancy_draid_damaged', 'redundancy_draid_spare1', 'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe'] tags = ['functional', 'redundancy'] timeout = 1200 [tests/functional/refquota] tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos', 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg', 'refquota_007_neg', 'refquota_008_neg'] tags = ['functional', 'refquota'] [tests/functional/refreserv] tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos', 'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz', 'refreserv_raidz'] tags = ['functional', 'refreserv'] [tests/functional/removal] pre = tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', 'removal_with_errors', 'removal_with_export', 'removal_with_ganging', 'removal_with_faulted', 'removal_with_remove', 'removal_with_scrub', 'removal_with_send', 'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write', 'removal_with_zdb', 'remove_expanded', 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz', 'remove_indirect', 'remove_attach_mirror'] tags = ['functional', 'removal'] [tests/functional/rename_dirs] tests = ['rename_dirs_001_pos'] tags = ['functional', 'rename_dirs'] [tests/functional/replacement] tests = ['attach_import', 'attach_multiple', 'attach_rebuild', 'attach_resilver', 'detach', 'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild', 'replace_resilver', 'resilver_restart_001', 'resilver_restart_002', 'scrub_cancel'] tags = ['functional', 'replacement'] [tests/functional/reservation] tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', 'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos', 'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos', 'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos', 'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos', 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos', 'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg', 'reservation_022_pos'] tags = ['functional', 'reservation'] [tests/functional/rootpool] tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos'] tags = ['functional', 'rootpool'] [tests/functional/rsend] tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos', 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', 'rsend_013_pos', 'rsend_014_pos', 'rsend_016_neg', 'rsend_019_pos', 'rsend_020_pos', 'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos', 'send-c_verify_ratio', 'send-c_verify_contents', 'send-c_props', 'send-c_incremental', 'send-c_volume', 'send-c_zstreamdump', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files', 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', 'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset', 'send_invalid', 'send_doall'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', 'scrub_mirror_003_pos', 'scrub_mirror_004_pos'] tags = ['functional', 'scrub_mirror'] [tests/functional/slog] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001', 'slog_replay_fs_002', 'slog_replay_volume'] tags = ['functional', 'slog'] [tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', 'rollback_003_pos', 'snapshot_001_pos', 'snapshot_002_pos', 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos', 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_017_pos'] tags = ['functional', 'snapshot'] [tests/functional/snapused] tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos', 'snapused_004_pos', 'snapused_005_pos'] tags = ['functional', 'snapused'] [tests/functional/sparse] tests = ['sparse_001_pos'] tags = ['functional', 'sparse'] [tests/functional/stat] tests = ['stat_001_pos'] tags = ['functional', 'stat'] [tests/functional/suid] tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', 'suid_write_to_none'] tags = ['functional', 'suid'] [tests/functional/threadsappend] tests = ['threadsappend_001_pos'] tags = ['functional', 'threadsappend'] [tests/functional/trim] tests = ['autotrim_integrity', 'autotrim_config', 'autotrim_trim_integrity', 'trim_integrity', 'trim_config', 'trim_l2arc'] tags = ['functional', 'trim'] [tests/functional/truncate] tests = ['truncate_001_pos', 'truncate_002_pos', 'truncate_timestamps'] tags = ['functional', 'truncate'] [tests/functional/upgrade] tests = ['upgrade_userobj_001_pos', 'upgrade_readonly_pool'] tags = ['functional', 'upgrade'] [tests/functional/userquota] tests = [ 'userquota_001_pos', 'userquota_002_pos', 'userquota_003_pos', 'userquota_004_pos', 'userquota_005_neg', 'userquota_006_pos', 'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos', 'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg', - 'userspace_001_pos', 'userspace_002_pos', 'userspace_encrypted'] + 'userspace_001_pos', 'userspace_002_pos', 'userspace_encrypted', + 'userspace_send_encrypted'] tags = ['functional', 'userquota'] [tests/functional/vdev_zaps] tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos', 'vdev_zaps_007_pos'] tags = ['functional', 'vdev_zaps'] [tests/functional/write_dirs] tests = ['write_dirs_001_pos', 'write_dirs_002_pos'] tags = ['functional', 'write_dirs'] [tests/functional/xattr] tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos', 'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', 'xattr_011_pos', 'xattr_012_pos', 'xattr_013_pos'] tags = ['functional', 'xattr'] [tests/functional/zvol/zvol_ENOSPC] tests = ['zvol_ENOSPC_001_pos'] tags = ['functional', 'zvol', 'zvol_ENOSPC'] [tests/functional/zvol/zvol_cli] tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg'] tags = ['functional', 'zvol', 'zvol_cli'] [tests/functional/zvol/zvol_misc] tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse', 'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil'] tags = ['functional', 'zvol', 'zvol_misc'] [tests/functional/zvol/zvol_swap] tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos'] tags = ['functional', 'zvol', 'zvol_swap'] [tests/functional/libzfs] tests = ['many_fds', 'libzfs_input'] tags = ['functional', 'libzfs'] [tests/functional/log_spacemap] tests = ['log_spacemap_import_logs'] pre = post = tags = ['functional', 'log_spacemap'] [tests/functional/l2arc] tests = ['l2arc_arcstats_pos', 'l2arc_mfuonly_pos', 'l2arc_l2miss_pos', 'persist_l2arc_001_pos', 'persist_l2arc_002_pos', 'persist_l2arc_003_neg', 'persist_l2arc_004_pos', 'persist_l2arc_005_pos'] tags = ['functional', 'l2arc'] [tests/functional/zpool_influxdb] tests = ['zpool_influxdb'] tags = ['functional', 'zpool_influxdb'] diff --git a/tests/zfs-tests/tests/functional/userquota/Makefile.am b/tests/zfs-tests/tests/functional/userquota/Makefile.am index 9100e4adadca..2c94d3e1521c 100644 --- a/tests/zfs-tests/tests/functional/userquota/Makefile.am +++ b/tests/zfs-tests/tests/functional/userquota/Makefile.am @@ -1,28 +1,29 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/userquota dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ groupspace_001_pos.ksh \ groupspace_002_pos.ksh \ groupspace_003_pos.ksh \ userquota_001_pos.ksh \ userquota_002_pos.ksh \ userquota_003_pos.ksh \ userquota_004_pos.ksh \ userquota_005_neg.ksh \ userquota_006_pos.ksh \ userquota_007_pos.ksh \ userquota_008_pos.ksh \ userquota_009_pos.ksh \ userquota_010_pos.ksh \ userquota_011_pos.ksh \ userquota_012_neg.ksh \ userquota_013_pos.ksh \ userspace_001_pos.ksh \ userspace_002_pos.ksh \ userspace_003_pos.ksh \ - userspace_encrypted.ksh + userspace_encrypted.ksh \ + userspace_send_encrypted.ksh dist_pkgdata_DATA = \ userquota.cfg \ userquota_common.kshlib diff --git a/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh b/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh new file mode 100755 index 000000000000..e9ef0c4262e7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021, George Amanakis . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/userquota/userquota_common.kshlib + +# +# DESCRIPTION: +# Sending raw encrypted datasets back to the source dataset succeeds. +# +# +# STRATEGY: +# 1. Create encrypted source dataset, set userquota and write a file +# 2. Create base snapshot +# 3. Write new file, snapshot, get userspace +# 4. Raw send both snapshots +# 5. Destroy latest snapshot at source and rollback +# 6. Unmount, unload key from source +# 7. Raw send latest snapshot back to source +# 8. Mount both source and target datasets +# 9. Verify encrypted datasets support 'zfs userspace' and 'zfs groupspace' +# and the accounting is done correctly +# + +function cleanup +{ + destroy_pool $POOLNAME + rm -f $FILEDEV +} + +log_onexit cleanup + +FILEDEV="$TEST_BASE_DIR/userspace_encrypted" +POOLNAME="testpool$$" +ENC_SOURCE="$POOLNAME/source" +ENC_TARGET="$POOLNAME/target" + +log_assert "Sending raw encrypted datasets back to the source dataset succeeds." + +# Setup pool and create source +truncate -s 200m $FILEDEV +log_must zpool create -o feature@encryption=enabled $POOLNAME \ + $FILEDEV +log_must eval "echo 'password' | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt " \ + "$ENC_SOURCE" + +# Set user quota and write file +log_must zfs set userquota@$QUSER1=50m $ENC_SOURCE +mkmount_writable $ENC_SOURCE +mntpnt=$(get_prop mountpoint $ENC_SOURCE) +log_must user_run $QUSER1 mkfile 10m /$mntpnt/file1 +sync + +# Snapshot +log_must zfs snap $ENC_SOURCE@base + +# Write new file, snapshot, get userspace +log_must user_run $QUSER1 mkfile 20m /$mntpnt/file2 +log_must zfs snap $ENC_SOURCE@s1 + +# Raw send both snapshots +log_must eval "zfs send -w $ENC_SOURCE@base | zfs recv " \ + "$ENC_TARGET" +log_must eval "zfs send -w -i @base $ENC_SOURCE@s1 | zfs recv " \ + "$ENC_TARGET" + +# Destroy latest snapshot at source and rollback +log_must zfs destroy $ENC_SOURCE@s1 +log_must zfs rollback $ENC_SOURCE@base +rollback_uspace=$(zfs userspace -Hp $ENC_SOURCE | \ + awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}') + +# Unmount, unload key +log_must zfs umount $ENC_SOURCE +log_must zfs unload-key -a + +# Raw send latest snapshot back to source +log_must eval "zfs send -w -i @base $ENC_TARGET@s1 | zfs recv " \ + "$ENC_SOURCE" + +# Mount encrypted datasets and verify they support 'zfs userspace' and +# 'zfs groupspace' and the accounting is done correctly +log_must eval "echo 'password' | zfs load-key $ENC_SOURCE" +log_must eval "echo 'password' | zfs load-key $ENC_TARGET" +log_must zfs mount $ENC_SOURCE +log_must zfs mount $ENC_TARGET +sync + +sleep 5 + +src_uspace=$(zfs userspace -Hp $ENC_SOURCE | \ + awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}') +tgt_uspace=$(zfs userspace -Hp $ENC_TARGET | \ + awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}') +log_must test "$src_uspace" -eq "$tgt_uspace" +log_must test "$rollback_uspace" -ne "$src_uspace" + +src_uquota=$(zfs userspace -Hp $ENC_SOURCE | awk "/$QUSER1/"' {print $5}') +tgt_uquota=$(zfs userspace -Hp $ENC_TARGET | awk "/$QUSER1/"' {print $5}') +log_must test "$src_uquota" -eq "$tgt_uquota" + +# Cleanup +cleanup + +log_pass "Sending raw encrypted datasets back to the source dataset succeeds."