diff --git a/cmd/zstream/Makefile.am b/cmd/zstream/Makefile.am index 9b2716ae0391..9ae33179e5d6 100644 --- a/cmd/zstream/Makefile.am +++ b/cmd/zstream/Makefile.am @@ -1,18 +1,20 @@ sbin_PROGRAMS += zstream CPPCHECKTARGETS += zstream zstream_SOURCES = \ %D%/zstream.c \ %D%/zstream.h \ + %D%/zstream_decompress.c \ %D%/zstream_dump.c \ %D%/zstream_redup.c \ %D%/zstream_token.c zstream_LDADD = \ libzfs.la \ libzfs_core.la \ + libzpool.la \ libnvpair.la PHONY += install-exec-hook install-exec-hook: cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump diff --git a/cmd/zstream/zstream.c b/cmd/zstream/zstream.c index a228f45fad79..eeceba2475ca 100644 --- a/cmd/zstream/zstream.c +++ b/cmd/zstream/zstream.c @@ -1,71 +1,75 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2020 by Delphix. All rights reserved. * Copyright (c) 2020 by Datto Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include "zstream.h" void zstream_usage(void) { (void) fprintf(stderr, "usage: zstream command args ...\n" "Available commands are:\n" "\n" "\tzstream dump [-vCd] FILE\n" "\t... | zstream dump [-vCd]\n" "\n" + "\tzstream decompress [-v] [OBJECT,OFFSET[,TYPE]] ...\n" + "\n" "\tzstream token resume_token\n" "\n" "\tzstream redup [-v] FILE | ...\n"); exit(1); } int main(int argc, char *argv[]) { char *basename = strrchr(argv[0], '/'); basename = basename ? (basename + 1) : argv[0]; if (argc >= 1 && strcmp(basename, "zstreamdump") == 0) return (zstream_do_dump(argc, argv)); if (argc < 2) zstream_usage(); char *subcommand = argv[1]; if (strcmp(subcommand, "dump") == 0) { return (zstream_do_dump(argc - 1, argv + 1)); + } else if (strcmp(subcommand, "decompress") == 0) { + return (zstream_do_decompress(argc - 1, argv + 1)); } else if (strcmp(subcommand, "token") == 0) { return (zstream_do_token(argc - 1, argv + 1)); } else if (strcmp(subcommand, "redup") == 0) { return (zstream_do_redup(argc - 1, argv + 1)); } else { zstream_usage(); } } diff --git a/cmd/zstream/zstream.h b/cmd/zstream/zstream.h index 319fecb2876b..931d4e13fec0 100644 --- a/cmd/zstream/zstream.h +++ b/cmd/zstream/zstream.h @@ -1,36 +1,40 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2020 by Delphix. All rights reserved. */ #ifndef _ZSTREAM_H #define _ZSTREAM_H #ifdef __cplusplus extern "C" { #endif +extern void *safe_calloc(size_t n); +extern int sfread(void *buf, size_t size, FILE *fp); +extern void *safe_malloc(size_t size); extern int zstream_do_redup(int, char *[]); extern int zstream_do_dump(int, char *[]); +extern int zstream_do_decompress(int argc, char *argv[]); extern int zstream_do_token(int, char *[]); extern void zstream_usage(void); #ifdef __cplusplus } #endif #endif /* _ZSTREAM_H */ diff --git a/cmd/zstream/zstream_decompress.c b/cmd/zstream/zstream_decompress.c new file mode 100644 index 000000000000..4c924e0e10a0 --- /dev/null +++ b/cmd/zstream/zstream_decompress.c @@ -0,0 +1,359 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2022 Axcient. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "zfs_fletcher.h" +#include "zstream.h" + +static int +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) +{ + assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum) + == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); +} + +int +zstream_do_decompress(int argc, char *argv[]) +{ + const int KEYSIZE = 64; + int bufsz = SPA_MAXBLOCKSIZE; + char *buf = safe_malloc(bufsz); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + zio_cksum_t stream_cksum; + int c; + boolean_t verbose = B_FALSE; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = B_TRUE; + break; + case '?': + (void) fprintf(stderr, "invalid option '%c'\n", + optopt); + zstream_usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc < 0) + zstream_usage(); + + if (hcreate(argc) == 0) + errx(1, "hcreate"); + for (int i = 0; i < argc; i++) { + uint64_t object, offset; + char *obj_str; + char *offset_str; + char *key; + char *end; + enum zio_compress type = ZIO_COMPRESS_LZ4; + + obj_str = strsep(&argv[i], ","); + if (argv[i] == NULL) { + zstream_usage(); + exit(2); + } + errno = 0; + object = strtoull(obj_str, &end, 0); + if (errno || *end != '\0') + errx(1, "invalid value for object"); + offset_str = strsep(&argv[i], ","); + offset = strtoull(offset_str, &end, 0); + if (errno || *end != '\0') + errx(1, "invalid value for offset"); + if (argv[i]) { + if (0 == strcmp("lz4", argv[i])) + type = ZIO_COMPRESS_LZ4; + else if (0 == strcmp("lzjb", argv[i])) + type = ZIO_COMPRESS_LZJB; + else if (0 == strcmp("gzip", argv[i])) + type = ZIO_COMPRESS_GZIP_1; + else if (0 == strcmp("zle", argv[i])) + type = ZIO_COMPRESS_ZLE; + else if (0 == strcmp("zstd", argv[i])) + type = ZIO_COMPRESS_ZSTD; + else { + fprintf(stderr, "Invalid compression type %s.\n" + "Supported types are lz4, lzjb, gzip, zle, " + "and zstd\n", + argv[i]); + exit(2); + } + } + + if (asprintf(&key, "%llu,%llu", (u_longlong_t)object, + (u_longlong_t)offset) < 0) { + err(1, "asprintf"); + } + ENTRY e = {.key = key}; + ENTRY *p; + + p = hsearch(e, ENTER); + if (p == NULL) + errx(1, "hsearch"); + p->data = (void*)type; + } + + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + "Error: The send stream is a binary format " + "and can not be read from a\n" + "terminal. Standard input must be redirected.\n"); + exit(1); + } + + fletcher_4_init(); + while (sfread(drr, sizeof (*drr), stdin) != 0) { + struct drr_write *drrw; + uint64_t payload_size = 0; + + /* + * We need to regenerate the checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + memset(&drr->drr_u.drr_checksum.drr_checksum, 0, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + + switch (drr->drr_type) { + case DRR_BEGIN: + { + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + int sz = drr->drr_payloadlen; + if (sz != 0) { + if (sz > bufsz) { + buf = realloc(buf, sz); + if (buf == NULL) + err(1, "realloc"); + bufsz = sz; + } + (void) sfread(buf, sz, stdin); + } + payload_size = sz; + break; + } + case DRR_END: + { + struct drr_end *drre = &drr->drr_u.drr_end; + /* + * Use the recalculated checksum, unless this is + * the END record of a stream package, which has + * no checksum. + */ + if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum)) + drre->drr_checksum = stream_cksum; + break; + } + + case DRR_OBJECT: + { + struct drr_object *drro = &drr->drr_u.drr_object; + + if (drro->drr_bonuslen > 0) { + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_SPILL: + { + struct drr_spill *drrs = &drr->drr_u.drr_spill; + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_WRITE_BYREF: + fprintf(stderr, + "Deduplicated streams are not supported\n"); + exit(1); + break; + + case DRR_WRITE: + { + drrw = &thedrr.drr_u.drr_write; + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + ENTRY *p; + char key[KEYSIZE]; + + snprintf(key, KEYSIZE, "%llu,%llu", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + ENTRY e = {.key = key}; + + p = hsearch(e, FIND); + if (p != NULL) { + zio_decompress_func_t *xfunc = NULL; + switch ((enum zio_compress)(intptr_t)p->data) { + case ZIO_COMPRESS_LZJB: + xfunc = lzjb_decompress; + break; + case ZIO_COMPRESS_GZIP_1: + xfunc = gzip_decompress; + break; + case ZIO_COMPRESS_ZLE: + xfunc = zle_decompress; + break; + case ZIO_COMPRESS_LZ4: + xfunc = lz4_decompress_zfs; + break; + case ZIO_COMPRESS_ZSTD: + xfunc = zfs_zstd_decompress; + break; + default: + assert(B_FALSE); + } + assert(xfunc != NULL); + + + /* + * Read and decompress the block + */ + char *lzbuf = safe_calloc(payload_size); + (void) sfread(lzbuf, payload_size, stdin); + if (0 != xfunc(lzbuf, buf, + payload_size, payload_size, 0)) { + /* + * The block must not be compressed, + * possibly because it gets written + * multiple times in this stream. + */ + warnx("decompression failed for " + "ino %llu offset %llu", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + memcpy(buf, lzbuf, payload_size); + } else if (verbose) { + fprintf(stderr, "successfully " + "decompressed ino %llu " + "offset %llu\n", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + } + free(lzbuf); + } else { + /* + * Read the contents of the block unaltered + */ + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; + payload_size = + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_FREEOBJECTS: + case DRR_FREE: + case DRR_OBJECT_RANGE: + break; + + default: + (void) fprintf(stderr, "INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + + if (feof(stdout)) { + fprintf(stderr, "Error: unexpected end-of-file\n"); + exit(1); + } + if (ferror(stdout)) { + fprintf(stderr, "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } + + /* + * We need to recalculate the checksum, and it needs to be + * initially zero to do that. BEGIN records don't have + * a checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + memset(&drr->drr_u.drr_checksum.drr_checksum, 0, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + if (dump_record(drr, buf, payload_size, + &stream_cksum, STDOUT_FILENO) != 0) + break; + if (drr->drr_type == DRR_END) { + /* + * Typically the END record is either the last + * thing in the stream, or it is followed + * by a BEGIN record (which also zeros the checksum). + * However, a stream package ends with two END + * records. The last END record's checksum starts + * from zero. + */ + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + } + } + free(buf); + fletcher_4_fini(); + hdestroy(); + + return (0); +} diff --git a/cmd/zstream/zstream_dump.c b/cmd/zstream/zstream_dump.c index 977256cae400..170d84fed092 100644 --- a/cmd/zstream/zstream_dump.c +++ b/cmd/zstream/zstream_dump.c @@ -1,812 +1,812 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Portions Copyright 2012 Martin Matuska */ /* * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include "zstream.h" /* * If dump mode is enabled, the number of bytes to print per line */ #define BYTES_PER_LINE 16 /* * If dump mode is enabled, the number of bytes to group together, separated * by newlines or spaces */ #define DUMP_GROUPING 4 uint64_t total_stream_len = 0; FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; boolean_t do_cksum = B_TRUE; -static void * +void * safe_malloc(size_t size) { void *rv = malloc(size); if (rv == NULL) { (void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n", size); abort(); } return (rv); } /* * ssread - send stream read. * * Read while computing incremental checksum */ static size_t ssread(void *buf, size_t len, zio_cksum_t *cksum) { size_t outlen; if ((outlen = fread(buf, len, 1, send_stream)) == 0) return (0); if (do_cksum) { if (do_byteswap) fletcher_4_incremental_byteswap(buf, len, cksum); else fletcher_4_incremental_native(buf, len, cksum); } total_stream_len += len; return (outlen); } static size_t read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) { ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); if (r == 0) return (0); zio_cksum_t saved_cksum = *cksum; r = ssread(&drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), cksum); if (r == 0) return (0); if (do_cksum && !ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && !ZIO_CHECKSUM_EQUAL(saved_cksum, drr->drr_u.drr_checksum.drr_checksum)) { fprintf(stderr, "invalid checksum\n"); (void) printf("Incorrect checksum in record header.\n"); (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", (longlong_t)saved_cksum.zc_word[0], (longlong_t)saved_cksum.zc_word[1], (longlong_t)saved_cksum.zc_word[2], (longlong_t)saved_cksum.zc_word[3]); return (0); } return (sizeof (*drr)); } /* * Print part of a block in ASCII characters */ static void print_ascii_block(char *subbuf, int length) { int i; for (i = 0; i < length; i++) { char char_print = isprint(subbuf[i]) ? subbuf[i] : '.'; if (i != 0 && i % DUMP_GROUPING == 0) { (void) printf(" "); } (void) printf("%c", char_print); } (void) printf("\n"); } /* * print_block - Dump the contents of a modified block to STDOUT * * Assume that buf has capacity evenly divisible by BYTES_PER_LINE */ static void print_block(char *buf, int length) { int i; /* * Start printing ASCII characters at a constant offset, after * the hex prints. Leave 3 characters per byte on a line (2 digit * hex number plus 1 space) plus spaces between characters and * groupings. */ int ascii_start = BYTES_PER_LINE * 3 + BYTES_PER_LINE / DUMP_GROUPING + 2; for (i = 0; i < length; i += BYTES_PER_LINE) { int j; int this_line_length = MIN(BYTES_PER_LINE, length - i); int print_offset = 0; for (j = 0; j < this_line_length; j++) { int buf_offset = i + j; /* * Separate every DUMP_GROUPING bytes by a space. */ if (buf_offset % DUMP_GROUPING == 0) { print_offset += printf(" "); } /* * Print the two-digit hex value for this byte. */ unsigned char hex_print = buf[buf_offset]; print_offset += printf("%02x ", hex_print); } (void) printf("%*s", ascii_start - print_offset, " "); print_ascii_block(buf + i, this_line_length); } } /* * Print an array of bytes to stdout as hexadecimal characters. str must * have buf_len * 2 + 1 bytes of space. */ static void sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len) { int i, n; for (i = 0; i < buf_len; i++) { n = sprintf(str, "%02x", buf[i] & 0xff); str += n; } str[0] = '\0'; } int zstream_do_dump(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; uint64_t total_payload_size = 0; uint64_t total_overhead_size = 0; uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; char salt[ZIO_DATA_SALT_LEN * 2 + 1]; char iv[ZIO_DATA_IV_LEN * 2 + 1]; char mac[ZIO_DATA_MAC_LEN * 2 + 1]; uint64_t total_records = 0; uint64_t payload_size; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; struct drr_end *drre = &thedrr.drr_u.drr_end; struct drr_object *drro = &thedrr.drr_u.drr_object; struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects; struct drr_write *drrw = &thedrr.drr_u.drr_write; struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_redact *drrr = &thedrr.drr_u.drr_redact; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; int c; boolean_t verbose = B_FALSE; boolean_t very_verbose = B_FALSE; boolean_t first = B_TRUE; /* * dump flag controls whether the contents of any modified data blocks * are printed to the console during processing of the stream. Warning: * for large streams, this can obviously lead to massive prints. */ boolean_t dump = B_FALSE; int err; zio_cksum_t zc = { { 0 } }; zio_cksum_t pcksum = { { 0 } }; while ((c = getopt(argc, argv, ":vCd")) != -1) { switch (c) { case 'C': do_cksum = B_FALSE; break; case 'v': if (verbose) very_verbose = B_TRUE; verbose = B_TRUE; break; case 'd': dump = B_TRUE; verbose = B_TRUE; very_verbose = B_TRUE; break; case ':': (void) fprintf(stderr, "missing argument for '%c' option\n", optopt); zstream_usage(); break; case '?': (void) fprintf(stderr, "invalid option '%c'\n", optopt); zstream_usage(); break; } } if (argc > optind) { const char *filename = argv[optind]; send_stream = fopen(filename, "r"); if (send_stream == NULL) { (void) fprintf(stderr, "Error while opening file '%s': %s\n", filename, strerror(errno)); exit(1); } } else { if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, "Error: The send stream is a binary format " "and can not be read from a\n" "terminal. Standard input must be redirected, " "or a file must be\n" "specified as a command-line argument.\n"); exit(1); } send_stream = stdin; } fletcher_4_init(); while (read_hdr(drr, &zc)) { uint64_t featureflags = 0; /* * If this is the first DMU record being processed, check for * the magic bytes and figure out the endian-ness based on them. */ if (first) { if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { do_byteswap = B_TRUE; if (do_cksum) { ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); /* * recalculate header checksum now * that we know it needs to be * byteswapped. */ fletcher_4_incremental_byteswap(drr, sizeof (dmu_replay_record_t), &zc); } } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) { (void) fprintf(stderr, "Invalid stream " "(bad magic number)\n"); exit(1); } first = B_FALSE; } if (do_byteswap) { drr->drr_type = BSWAP_32(drr->drr_type); drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); } /* * At this point, the leading fields of the replay record * (drr_type and drr_payloadlen) have been byte-swapped if * necessary, but the rest of the data structure (the * union of type-specific structures) is still in its * original state. */ if (drr->drr_type >= DRR_NUMTYPES) { (void) printf("INVALID record found: type 0x%x\n", drr->drr_type); (void) printf("Aborting.\n"); exit(1); } drr_record_count[drr->drr_type]++; total_overhead_size += sizeof (*drr); total_records++; payload_size = 0; switch (drr->drr_type) { case DRR_BEGIN: if (do_byteswap) { drrb->drr_magic = BSWAP_64(drrb->drr_magic); drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); drrb->drr_type = BSWAP_32(drrb->drr_type); drrb->drr_flags = BSWAP_32(drrb->drr_flags); drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); (void) printf("BEGIN record\n"); (void) printf("\thdrtype = %lld\n", DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo)); (void) printf("\tfeatures = %llx\n", DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo)); (void) printf("\tmagic = %llx\n", (u_longlong_t)drrb->drr_magic); (void) printf("\tcreation_time = %llx\n", (u_longlong_t)drrb->drr_creation_time); (void) printf("\ttype = %u\n", drrb->drr_type); (void) printf("\tflags = 0x%x\n", drrb->drr_flags); (void) printf("\ttoguid = %llx\n", (u_longlong_t)drrb->drr_toguid); (void) printf("\tfromguid = %llx\n", (u_longlong_t)drrb->drr_fromguid); (void) printf("\ttoname = %s\n", drrb->drr_toname); (void) printf("\tpayloadlen = %u\n", drr->drr_payloadlen); if (verbose) (void) printf("\n"); if (drr->drr_payloadlen != 0) { nvlist_t *nv; int sz = drr->drr_payloadlen; if (sz > SPA_MAXBLOCKSIZE) { free(buf); buf = safe_malloc(sz); } (void) ssread(buf, sz, &zc); if (ferror(send_stream)) perror("fread"); err = nvlist_unpack(buf, sz, &nv, 0); if (err) { perror(strerror(err)); } else { nvlist_print(stdout, nv); nvlist_free(nv); } payload_size = sz; } break; case DRR_END: if (do_byteswap) { drre->drr_checksum.zc_word[0] = BSWAP_64(drre->drr_checksum.zc_word[0]); drre->drr_checksum.zc_word[1] = BSWAP_64(drre->drr_checksum.zc_word[1]); drre->drr_checksum.zc_word[2] = BSWAP_64(drre->drr_checksum.zc_word[2]); drre->drr_checksum.zc_word[3] = BSWAP_64(drre->drr_checksum.zc_word[3]); } /* * We compare against the *previous* checksum * value, because the stored checksum is of * everything before the DRR_END record. */ if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum, pcksum)) { (void) printf("Expected checksum differs from " "checksum in stream.\n"); (void) printf("Expected checksum = " "%llx/%llx/%llx/%llx\n", (long long unsigned int)pcksum.zc_word[0], (long long unsigned int)pcksum.zc_word[1], (long long unsigned int)pcksum.zc_word[2], (long long unsigned int)pcksum.zc_word[3]); } (void) printf("END checksum = %llx/%llx/%llx/%llx\n", (long long unsigned int) drre->drr_checksum.zc_word[0], (long long unsigned int) drre->drr_checksum.zc_word[1], (long long unsigned int) drre->drr_checksum.zc_word[2], (long long unsigned int) drre->drr_checksum.zc_word[3]); ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); break; case DRR_OBJECT: if (do_byteswap) { drro->drr_object = BSWAP_64(drro->drr_object); drro->drr_type = BSWAP_32(drro->drr_type); drro->drr_bonustype = BSWAP_32(drro->drr_bonustype); drro->drr_blksz = BSWAP_32(drro->drr_blksz); drro->drr_bonuslen = BSWAP_32(drro->drr_bonuslen); drro->drr_raw_bonuslen = BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); drro->drr_maxblkid = BSWAP_64(drro->drr_maxblkid); } if (featureflags & DMU_BACKUP_FEATURE_RAW && drro->drr_bonuslen > drro->drr_raw_bonuslen) { (void) fprintf(stderr, "Warning: Object %llu has bonuslen = " "%u > raw_bonuslen = %u\n\n", (u_longlong_t)drro->drr_object, drro->drr_bonuslen, drro->drr_raw_bonuslen); } payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); if (verbose) { (void) printf("OBJECT object = %llu type = %u " "bonustype = %u blksz = %u bonuslen = %u " "dn_slots = %u raw_bonuslen = %u " "flags = %u maxblkid = %llu " "indblkshift = %u nlevels = %u " "nblkptr = %u\n", (u_longlong_t)drro->drr_object, drro->drr_type, drro->drr_bonustype, drro->drr_blksz, drro->drr_bonuslen, drro->drr_dn_slots, drro->drr_raw_bonuslen, drro->drr_flags, (u_longlong_t)drro->drr_maxblkid, drro->drr_indblkshift, drro->drr_nlevels, drro->drr_nblkptr); } if (drro->drr_bonuslen > 0) { (void) ssread(buf, payload_size, &zc); if (dump) print_block(buf, payload_size); } break; case DRR_FREEOBJECTS: if (do_byteswap) { drrfo->drr_firstobj = BSWAP_64(drrfo->drr_firstobj); drrfo->drr_numobjs = BSWAP_64(drrfo->drr_numobjs); drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid); } if (verbose) { (void) printf("FREEOBJECTS firstobj = %llu " "numobjs = %llu\n", (u_longlong_t)drrfo->drr_firstobj, (u_longlong_t)drrfo->drr_numobjs); } break; case DRR_WRITE: if (do_byteswap) { drrw->drr_object = BSWAP_64(drrw->drr_object); drrw->drr_type = BSWAP_32(drrw->drr_type); drrw->drr_offset = BSWAP_64(drrw->drr_offset); drrw->drr_logical_size = BSWAP_64(drrw->drr_logical_size); drrw->drr_toguid = BSWAP_64(drrw->drr_toguid); drrw->drr_key.ddk_prop = BSWAP_64(drrw->drr_key.ddk_prop); drrw->drr_compressed_size = BSWAP_64(drrw->drr_compressed_size); } payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); /* * If this is verbose and/or dump output, * print info on the modified block */ if (verbose) { sprintf_bytes(salt, drrw->drr_salt, ZIO_DATA_SALT_LEN); sprintf_bytes(iv, drrw->drr_iv, ZIO_DATA_IV_LEN); sprintf_bytes(mac, drrw->drr_mac, ZIO_DATA_MAC_LEN); (void) printf("WRITE object = %llu type = %u " "checksum type = %u compression type = %u " "flags = %u offset = %llu " "logical_size = %llu " "compressed_size = %llu " "payload_size = %llu props = %llx " "salt = %s iv = %s mac = %s\n", (u_longlong_t)drrw->drr_object, drrw->drr_type, drrw->drr_checksumtype, drrw->drr_compressiontype, drrw->drr_flags, (u_longlong_t)drrw->drr_offset, (u_longlong_t)drrw->drr_logical_size, (u_longlong_t)drrw->drr_compressed_size, (u_longlong_t)payload_size, (u_longlong_t)drrw->drr_key.ddk_prop, salt, iv, mac); } /* * Read the contents of the block in from STDIN to buf */ (void) ssread(buf, payload_size, &zc); /* * If in dump mode */ if (dump) { print_block(buf, payload_size); } break; case DRR_WRITE_BYREF: if (do_byteswap) { drrwbr->drr_object = BSWAP_64(drrwbr->drr_object); drrwbr->drr_offset = BSWAP_64(drrwbr->drr_offset); drrwbr->drr_length = BSWAP_64(drrwbr->drr_length); drrwbr->drr_toguid = BSWAP_64(drrwbr->drr_toguid); drrwbr->drr_refguid = BSWAP_64(drrwbr->drr_refguid); drrwbr->drr_refobject = BSWAP_64(drrwbr->drr_refobject); drrwbr->drr_refoffset = BSWAP_64(drrwbr->drr_refoffset); drrwbr->drr_key.ddk_prop = BSWAP_64(drrwbr->drr_key.ddk_prop); } if (verbose) { (void) printf("WRITE_BYREF object = %llu " "checksum type = %u props = %llx " "offset = %llu length = %llu " "toguid = %llx refguid = %llx " "refobject = %llu refoffset = %llu\n", (u_longlong_t)drrwbr->drr_object, drrwbr->drr_checksumtype, (u_longlong_t)drrwbr->drr_key.ddk_prop, (u_longlong_t)drrwbr->drr_offset, (u_longlong_t)drrwbr->drr_length, (u_longlong_t)drrwbr->drr_toguid, (u_longlong_t)drrwbr->drr_refguid, (u_longlong_t)drrwbr->drr_refobject, (u_longlong_t)drrwbr->drr_refoffset); } break; case DRR_FREE: if (do_byteswap) { drrf->drr_object = BSWAP_64(drrf->drr_object); drrf->drr_offset = BSWAP_64(drrf->drr_offset); drrf->drr_length = BSWAP_64(drrf->drr_length); } if (verbose) { (void) printf("FREE object = %llu " "offset = %llu length = %lld\n", (u_longlong_t)drrf->drr_object, (u_longlong_t)drrf->drr_offset, (longlong_t)drrf->drr_length); } break; case DRR_SPILL: if (do_byteswap) { drrs->drr_object = BSWAP_64(drrs->drr_object); drrs->drr_length = BSWAP_64(drrs->drr_length); drrs->drr_compressed_size = BSWAP_64(drrs->drr_compressed_size); drrs->drr_type = BSWAP_32(drrs->drr_type); } payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); if (verbose) { sprintf_bytes(salt, drrs->drr_salt, ZIO_DATA_SALT_LEN); sprintf_bytes(iv, drrs->drr_iv, ZIO_DATA_IV_LEN); sprintf_bytes(mac, drrs->drr_mac, ZIO_DATA_MAC_LEN); (void) printf("SPILL block for object = %llu " "length = %llu flags = %u " "compression type = %u " "compressed_size = %llu " "payload_size = %llu " "salt = %s iv = %s mac = %s\n", (u_longlong_t)drrs->drr_object, (u_longlong_t)drrs->drr_length, drrs->drr_flags, drrs->drr_compressiontype, (u_longlong_t)drrs->drr_compressed_size, (u_longlong_t)payload_size, salt, iv, mac); } (void) ssread(buf, payload_size, &zc); if (dump) { print_block(buf, payload_size); } break; case DRR_WRITE_EMBEDDED: if (do_byteswap) { drrwe->drr_object = BSWAP_64(drrwe->drr_object); drrwe->drr_offset = BSWAP_64(drrwe->drr_offset); drrwe->drr_length = BSWAP_64(drrwe->drr_length); drrwe->drr_toguid = BSWAP_64(drrwe->drr_toguid); drrwe->drr_lsize = BSWAP_32(drrwe->drr_lsize); drrwe->drr_psize = BSWAP_32(drrwe->drr_psize); } if (verbose) { (void) printf("WRITE_EMBEDDED object = %llu " "offset = %llu length = %llu " "toguid = %llx comp = %u etype = %u " "lsize = %u psize = %u\n", (u_longlong_t)drrwe->drr_object, (u_longlong_t)drrwe->drr_offset, (u_longlong_t)drrwe->drr_length, (u_longlong_t)drrwe->drr_toguid, drrwe->drr_compression, drrwe->drr_etype, drrwe->drr_lsize, drrwe->drr_psize); } (void) ssread(buf, P2ROUNDUP(drrwe->drr_psize, 8), &zc); if (dump) { print_block(buf, P2ROUNDUP(drrwe->drr_psize, 8)); } payload_size = P2ROUNDUP(drrwe->drr_psize, 8); break; case DRR_OBJECT_RANGE: if (do_byteswap) { drror->drr_firstobj = BSWAP_64(drror->drr_firstobj); drror->drr_numslots = BSWAP_64(drror->drr_numslots); drror->drr_toguid = BSWAP_64(drror->drr_toguid); } if (verbose) { sprintf_bytes(salt, drror->drr_salt, ZIO_DATA_SALT_LEN); sprintf_bytes(iv, drror->drr_iv, ZIO_DATA_IV_LEN); sprintf_bytes(mac, drror->drr_mac, ZIO_DATA_MAC_LEN); (void) printf("OBJECT_RANGE firstobj = %llu " "numslots = %llu flags = %u " "salt = %s iv = %s mac = %s\n", (u_longlong_t)drror->drr_firstobj, (u_longlong_t)drror->drr_numslots, drror->drr_flags, salt, iv, mac); } break; case DRR_REDACT: if (do_byteswap) { drrr->drr_object = BSWAP_64(drrr->drr_object); drrr->drr_offset = BSWAP_64(drrr->drr_offset); drrr->drr_length = BSWAP_64(drrr->drr_length); drrr->drr_toguid = BSWAP_64(drrr->drr_toguid); } if (verbose) { (void) printf("REDACT object = %llu offset = " "%llu length = %llu\n", (u_longlong_t)drrr->drr_object, (u_longlong_t)drrr->drr_offset, (u_longlong_t)drrr->drr_length); } break; case DRR_NUMTYPES: /* should never be reached */ exit(1); } if (drr->drr_type != DRR_BEGIN && very_verbose) { (void) printf(" checksum = %llx/%llx/%llx/%llx\n", (longlong_t)drrc->drr_checksum.zc_word[0], (longlong_t)drrc->drr_checksum.zc_word[1], (longlong_t)drrc->drr_checksum.zc_word[2], (longlong_t)drrc->drr_checksum.zc_word[3]); } pcksum = zc; drr_byte_count[drr->drr_type] += payload_size; total_payload_size += payload_size; } free(buf); fletcher_4_fini(); /* Print final summary */ (void) printf("SUMMARY:\n"); (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_BEGIN], (u_longlong_t)drr_byte_count[DRR_BEGIN]); (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_END], (u_longlong_t)drr_byte_count[DRR_END]); (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_OBJECT], (u_longlong_t)drr_byte_count[DRR_OBJECT]); (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE], (u_longlong_t)drr_byte_count[DRR_WRITE]); (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_FREE], (u_longlong_t)drr_byte_count[DRR_FREE]); (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", (u_longlong_t)drr_record_count[DRR_SPILL], (u_longlong_t)drr_byte_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", (u_longlong_t)total_records); (void) printf("\tTotal payload size = %lld (0x%llx)\n", (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); (void) printf("\tTotal header overhead = %lld (0x%llx)\n", (u_longlong_t)total_overhead_size, (u_longlong_t)total_overhead_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); return (0); } diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c index 20aff17ae652..5807fabcecb5 100644 --- a/cmd/zstream/zstream_redup.c +++ b/cmd/zstream/zstream_redup.c @@ -1,469 +1,469 @@ /* * CDDL HEADER START * * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * CDDL HEADER END */ /* * Copyright (c) 2020 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_fletcher.h" #include "zstream.h" #define MAX_RDT_PHYSMEM_PERCENT 20 #define SMALLEST_POSSIBLE_MAX_RDT_MB 128 typedef struct redup_entry { struct redup_entry *rde_next; uint64_t rde_guid; uint64_t rde_object; uint64_t rde_offset; uint64_t rde_stream_offset; } redup_entry_t; typedef struct redup_table { redup_entry_t **redup_hash_array; umem_cache_t *ddecache; uint64_t ddt_count; int numhashbits; } redup_table_t; int highbit64(uint64_t i) { if (i == 0) return (0); return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); } -static void * +void * safe_calloc(size_t n) { void *rv = calloc(1, n); if (rv == NULL) { fprintf(stderr, "Error: could not allocate %u bytes of memory\n", (int)n); exit(1); } return (rv); } /* * Safe version of fread(), exits on error. */ -static int +int sfread(void *buf, size_t size, FILE *fp) { int rv = fread(buf, size, 1, fp); if (rv == 0 && ferror(fp)) { (void) fprintf(stderr, "Error while reading file: %s\n", strerror(errno)); exit(1); } return (rv); } /* * Safe version of pread(), exits on error. */ static void spread(int fd, void *buf, size_t count, off_t offset) { ssize_t err = pread(fd, buf, count, offset); if (err == -1) { (void) fprintf(stderr, "Error while reading file: %s\n", strerror(errno)); exit(1); } else if (err != count) { (void) fprintf(stderr, "Error while reading file: short read\n"); exit(1); } } static int dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, zio_cksum_t *zc, int outfd) { assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum) == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); fletcher_4_incremental_native(drr, offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); if (drr->drr_type != DRR_BEGIN) { assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. drr_checksum.drr_checksum)); drr->drr_u.drr_checksum.drr_checksum = *zc; } fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc); if (write(outfd, drr, sizeof (*drr)) == -1) return (errno); if (payload_len != 0) { fletcher_4_incremental_native(payload, payload_len, zc); if (write(outfd, payload, payload_len) == -1) return (errno); } return (0); } static void rdt_insert(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset) { uint64_t ch = cityhash4(guid, object, offset, 0); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); redup_entry_t **rdepp; rdepp = &(rdt->redup_hash_array[hashcode]); redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL); rde->rde_next = *rdepp; rde->rde_guid = guid; rde->rde_object = object; rde->rde_offset = offset; rde->rde_stream_offset = stream_offset; *rdepp = rde; rdt->ddt_count++; } static void rdt_lookup(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t *stream_offsetp) { uint64_t ch = cityhash4(guid, object, offset, 0); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); for (redup_entry_t *rde = rdt->redup_hash_array[hashcode]; rde != NULL; rde = rde->rde_next) { if (rde->rde_guid == guid && rde->rde_object == object && rde->rde_offset == offset) { *stream_offsetp = rde->rde_stream_offset; return; } } assert(!"could not find expected redup table entry"); } /* * Convert a dedup stream (generated by "zfs send -D") to a * non-deduplicated stream. The entire infd will be converted, including * any substreams in a stream package (generated by "zfs send -RD"). The * infd must be seekable. */ static void zfs_redup_stream(int infd, int outfd, boolean_t verbose) { int bufsz = SPA_MAXBLOCKSIZE; dmu_replay_record_t thedrr = { 0 }; dmu_replay_record_t *drr = &thedrr; redup_table_t rdt; zio_cksum_t stream_cksum; uint64_t numbuckets; uint64_t num_records = 0; uint64_t num_write_byref_records = 0; #ifdef _ILP32 uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20; #else uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); uint64_t max_rde_size = MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100, SMALLEST_POSSIBLE_MAX_RDT_MB << 20); #endif numbuckets = max_rde_size / (sizeof (redup_entry_t)); /* * numbuckets must be a power of 2. Increase number to * a power of 2 if necessary. */ if (!ISP2(numbuckets)) numbuckets = 1ULL << highbit64(numbuckets); rdt.redup_hash_array = safe_calloc(numbuckets * sizeof (redup_entry_t *)); rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); rdt.numhashbits = highbit64(numbuckets) - 1; rdt.ddt_count = 0; char *buf = safe_calloc(bufsz); FILE *ofp = fdopen(infd, "r"); long offset = ftell(ofp); while (sfread(drr, sizeof (*drr), ofp) != 0) { num_records++; /* * We need to regenerate the checksum. */ if (drr->drr_type != DRR_BEGIN) { memset(&drr->drr_u.drr_checksum.drr_checksum, 0, sizeof (drr->drr_u.drr_checksum.drr_checksum)); } uint64_t payload_size = 0; switch (drr->drr_type) { case DRR_BEGIN: { struct drr_begin *drrb = &drr->drr_u.drr_begin; int fflags; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); assert(drrb->drr_magic == DMU_BACKUP_MAGIC); /* clear the DEDUP feature flag for this stream */ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); fflags &= ~(DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); /* cppcheck-suppress syntaxError */ DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); int sz = drr->drr_payloadlen; if (sz != 0) { if (sz > bufsz) { free(buf); buf = safe_calloc(sz); bufsz = sz; } (void) sfread(buf, sz, ofp); } payload_size = sz; break; } case DRR_END: { struct drr_end *drre = &drr->drr_u.drr_end; /* * Use the recalculated checksum, unless this is * the END record of a stream package, which has * no checksum. */ if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum)) drre->drr_checksum = stream_cksum; break; } case DRR_OBJECT: { struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); (void) sfread(buf, payload_size, ofp); } break; } case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); (void) sfread(buf, payload_size, ofp); break; } case DRR_WRITE_BYREF: { struct drr_write_byref drrwb = drr->drr_u.drr_write_byref; num_write_byref_records++; /* * Look up in hash table by drrwb->drr_refguid, * drr_refobject, drr_refoffset. Replace this * record with the found WRITE record, but with * drr_object,drr_offset,drr_toguid replaced with ours. */ uint64_t stream_offset = 0; rdt_lookup(&rdt, drrwb.drr_refguid, drrwb.drr_refobject, drrwb.drr_refoffset, &stream_offset); spread(infd, drr, sizeof (*drr), stream_offset); assert(drr->drr_type == DRR_WRITE); struct drr_write *drrw = &drr->drr_u.drr_write; assert(drrw->drr_toguid == drrwb.drr_refguid); assert(drrw->drr_object == drrwb.drr_refobject); assert(drrw->drr_offset == drrwb.drr_refoffset); payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); spread(infd, buf, payload_size, stream_offset + sizeof (*drr)); drrw->drr_toguid = drrwb.drr_toguid; drrw->drr_object = drrwb.drr_object; drrw->drr_offset = drrwb.drr_offset; break; } case DRR_WRITE: { struct drr_write *drrw = &drr->drr_u.drr_write; payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); (void) sfread(buf, payload_size, ofp); rdt_insert(&rdt, drrw->drr_toguid, drrw->drr_object, drrw->drr_offset, offset); break; } case DRR_WRITE_EMBEDDED: { struct drr_write_embedded *drrwe = &drr->drr_u.drr_write_embedded; payload_size = P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); (void) sfread(buf, payload_size, ofp); break; } case DRR_FREEOBJECTS: case DRR_FREE: case DRR_OBJECT_RANGE: break; default: (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); /* should never happen, so assert */ assert(B_FALSE); } if (feof(ofp)) { fprintf(stderr, "Error: unexpected end-of-file\n"); exit(1); } if (ferror(ofp)) { fprintf(stderr, "Error while reading file: %s\n", strerror(errno)); exit(1); } /* * We need to recalculate the checksum, and it needs to be * initially zero to do that. BEGIN records don't have * a checksum. */ if (drr->drr_type != DRR_BEGIN) { memset(&drr->drr_u.drr_checksum.drr_checksum, 0, sizeof (drr->drr_u.drr_checksum.drr_checksum)); } if (dump_record(drr, buf, payload_size, &stream_cksum, outfd) != 0) break; if (drr->drr_type == DRR_END) { /* * Typically the END record is either the last * thing in the stream, or it is followed * by a BEGIN record (which also zeros the checksum). * However, a stream package ends with two END * records. The last END record's checksum starts * from zero. */ ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); } offset = ftell(ofp); } if (verbose) { char mem_str[16]; zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t), mem_str, sizeof (mem_str)); fprintf(stderr, "converted stream with %llu total records, " "including %llu dedup records, using %sB memory.\n", (long long)num_records, (long long)num_write_byref_records, mem_str); } umem_cache_destroy(rdt.ddecache); free(rdt.redup_hash_array); free(buf); (void) fclose(ofp); } int zstream_do_redup(int argc, char *argv[]) { boolean_t verbose = B_FALSE; int c; while ((c = getopt(argc, argv, "v")) != -1) { switch (c) { case 'v': verbose = B_TRUE; break; case '?': (void) fprintf(stderr, "invalid option '%c'\n", optopt); zstream_usage(); break; } } argc -= optind; argv += optind; if (argc != 1) zstream_usage(); const char *filename = argv[0]; if (isatty(STDOUT_FILENO)) { (void) fprintf(stderr, "Error: Stream can not be written to a terminal.\n" "You must redirect standard output.\n"); return (1); } int fd = open(filename, O_RDONLY); if (fd == -1) { (void) fprintf(stderr, "Error while opening file '%s': %s\n", filename, strerror(errno)); exit(1); } fletcher_4_init(); zfs_redup_stream(fd, STDOUT_FILENO, verbose); fletcher_4_fini(); close(fd); return (0); } diff --git a/man/man8/zstream.8 b/man/man8/zstream.8 index c0322ee3ace0..aac7e3487712 100644 --- a/man/man8/zstream.8 +++ b/man/man8/zstream.8 @@ -1,117 +1,168 @@ .\" .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the .\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions .\" and limitations under the License. .\" .\" When distributing Covered Code, include this CDDL HEADER in each .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. .\" If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" CDDL HEADER END .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd May 8, 2021 +.Dd March 25, 2022 .Dt ZSTREAM 8 .Os . .Sh NAME .Nm zstream .Nd manipulate ZFS send streams .Sh SYNOPSIS .Nm .Cm dump .Op Fl Cvd .Op Ar file .Nm +.Cm decompress +.Op Fl v +.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ... +.Nm .Cm redup .Op Fl v .Ar file .Nm .Cm token .Ar resume_token . .Sh DESCRIPTION The .Sy zstream utility manipulates ZFS send streams output by the .Sy zfs send command. .Bl -tag -width "" .It Xo .Nm .Cm dump .Op Fl Cvd .Op Ar file .Xc Print information about the specified send stream, including headers and record counts. The send stream may either be in the specified .Ar file , or provided on standard input. .Bl -tag -width "-D" .It Fl C Suppress the validation of checksums. .It Fl v Verbose. Print metadata for each record. .It Fl d Dump data contained in each record. Implies verbose. .El .Pp The .Nm zstreamdump alias is provided for compatibility and is equivalent to running .Nm .Cm dump . .It Xo .Nm .Cm token .Ar resume_token .Xc Dumps zfs resume token information .It Xo .Nm +.Cm decompress +.Op Fl v +.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ... +.Xc +Decompress selected records in a ZFS send stream provided on standard input, +when the compression type recorded in ZFS metadata may be incorrect. +Specify the object number and byte offset of each record that you wish to +decompress. +Optionally specify the compression type. +Valid compression types include +.Sy gzip , +.Sy lz4 , +.Sy lzjb , +.Sy zstd , +and +.Sy zle . +The default is +.Sy lz4 . +Every record for that object beginning at that offset will be decompressed, if +possible. +It may not be possible, because the record may be corrupted in some but not +all of the stream's snapshots. +The repaired stream will be written to standard output. +.Bl -tag -width "-v" +.It Fl v +Verbose. +Print summary of decompressed records. +.El +.It Xo +.Nm .Cm redup .Op Fl v .Ar file .Xc Deduplicated send streams can be generated by using the .Nm zfs Cm send Fl D command. The ability to send deduplicated send streams is deprecated. In the future, the ability to receive a deduplicated send stream with .Nm zfs Cm receive will be removed. However, deduplicated send streams can still be received by utilizing .Nm zstream Cm redup . .Pp The .Nm zstream Cm redup command is provided a .Ar file containing a deduplicated send stream, and outputs an equivalent non-deduplicated send stream on standard output. Therefore, a deduplicated send stream can be received by running: .Dl # Nm zstream Cm redup Pa DEDUP_STREAM_FILE | Nm zfs Cm receive No … .Bl -tag -width "-D" .It Fl v Verbose. Print summary of converted records. .El .El . +.Sh EXAMPLES +Heal a dataset that was corrupted due to OpenZFS bug #12762. +First, determine which records are corrupt. +That cannot be done automatically; it requires information beyond ZFS's +metadata. +If object +.Sy 128 +is corrupted at offset +.Sy 0 +and is compressed using +.Sy lz4 , +then run this command: +.Bd -literal +.No # Nm zfs Ar send Fl c Ar … | Nm zstream decompress Ar 128,0,lz4 | \ +Nm zfs recv Ar … +.Ed .Sh SEE ALSO .Xr zfs 8 , .Xr zfs-receive 8 , -.Xr zfs-send 8 +.Xr zfs-send 8 , +.Lk https://github.com/openzfs/zfs/issues/12762