diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h index abdcc996e4c3..de7633c395ca 100644 --- a/usr.bin/mkuzip/mkuz_lzma.h +++ b/usr.bin/mkuzip/mkuz_lzma.h @@ -1,41 +1,35 @@ /* * Copyright (c) 2004-2016 Maxim Sobolev * Copyright (c) 2011 Aleksandr Rybalko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* Format L3.0, since we move to XZ API */ -#define CLOOP_MAGIC_LZMA \ - "#!/bin/sh\n" \ - "#L3.0\n" \ - "n=uncompress\n" \ - "m=geom_$n\n" \ - "(kldstat -m $m 2>&-||kldload $m)>&-&&" \ - "mount_cd9660 /dev/`mdconfig -af $0`.$n $1\n" \ - "exit $?\n" #define DEFAULT_SUFX_LZMA ".ulzma" +/* Format L3.0, since we move to XZ API */ +#define CLOOP_MAGIC_LZMA "#!/bin/sh\n#L3.0\n" + size_t mkuz_lzma_cbound(size_t); void *mkuz_lzma_init(int *); void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h index 88e68343a27d..5ecf77bb7284 100644 --- a/usr.bin/mkuzip/mkuz_zlib.h +++ b/usr.bin/mkuzip/mkuz_zlib.h @@ -1,35 +1,33 @@ /* * Copyright (c) 2004-2016 Maxim Sobolev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #define DEFAULT_SUFX_ZLIB ".uzip" -#define CLOOP_MAGIC_ZLIB "#!/bin/sh\n#V2.0 Format\n" \ - "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ - "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" +#define CLOOP_MAGIC_ZLIB "#!/bin/sh\n#V2.0 Format\n" size_t mkuz_zlib_cbound(size_t); void *mkuz_zlib_init(int *); void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zstd.h b/usr.bin/mkuzip/mkuz_zstd.h index b434afe61eee..ca62d3afd417 100644 --- a/usr.bin/mkuzip/mkuz_zstd.h +++ b/usr.bin/mkuzip/mkuz_zstd.h @@ -1,36 +1,34 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 Conrad Meyer * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #define DEFAULT_SUFX_ZSTD ".uzst" -#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \ - "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ - "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" +#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" size_t mkuz_zstd_cbound(size_t); void *mkuz_zstd_init(int *); void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8 index 6f7495ce5418..3b3afc626633 100644 --- a/usr.bin/mkuzip/mkuzip.8 +++ b/usr.bin/mkuzip/mkuzip.8 @@ -1,261 +1,272 @@ .\"- .\" Copyright (c) 2004-2016 Maxim Sobolev .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd August 9, 2019 .Dt MKUZIP 8 .Os .Sh NAME .Nm mkuzip .Nd compress disk image for use with .Xr geom_uzip 4 class .Sh SYNOPSIS .Nm .Op Fl dSsvZ .Op Fl A Ar compression_algorithm .Op Fl C Ar compression_level .Op Fl j Ar compression_jobs .Op Fl o Ar outfile .Op Fl s Ar cluster_size .Ar infile .Sh DESCRIPTION The .Nm utility compresses a disk image file so that the .Xr geom_uzip 4 class will be able to decompress the resulting image at run-time. This allows for a significant reduction of size of disk image at the expense of some CPU time required to decompress the data each time it is read. The .Nm utility works in two phases: .Bl -enum .It An .Ar infile image is split into clusters; each cluster is compressed. .It The resulting set of compressed clusters is written to the output file. In addition, a .Dq table of contents header is written which allows for efficient seeking. .El .Pp The options are: .Bl -tag -width indent .It Fl A Op Ar lzma | Ar zlib | Ar zstd Select a specific compression algorithm. If this option is not provided, the default is .Ar zlib . .Pp The .Ar lzma algorithm provides noticeable better compression levels than zlib on the same data set. It has vastly slower compression speed and moderately slower decompression speed. .Pp The .Ar zstd algorithm provides better compression levels than zlib on the same data set. It also has faster compression and decompression speed than zlib. In the very high compression .Dq level settings, it does not offer quite as high a compression ratio as .Ar lzma . However, its decompression speed does not suffer at high compression .Dq levels . .It Fl C Ar compression_level Select the integer compression level used to parameterize the chosen compression algorithm. .Pp For any given algorithm, a lesser number selects a faster compression mode. A greater number selects a slower compression mode. Typically, for the same algorithm, a greater .Ar compression_level provides better final compression ratio. .Pp For .Ar lzma , the range of valid compression levels is .Va 0-9 . The .Nm default for lzma is .Va 6 . .Pp For .Ar zlib , the range of valid compression levels is .Va 1-9 . The .Nm default for zlib is .Va 9 . .Pp For .Ar zstd , the range of valid compression levels is currently .Va 1-19 . The .Nm default for zstd is .Va 9 . .It Fl d Enable de-duplication. When the option is enabled .Nm detects identical blocks in the input and replaces each subsequent occurrence of such block with pointer to the very first one in the output. Setting this option results is moderate decrease of compressed image size, typically around 3-5% of a final size of the compressed image. .It Fl j Ar compression_jobs Specify the number of compression jobs that .Nm runs in parallel to speed up compression. When option is not specified the number of jobs set to be equal to the value of .Va hw.ncpu .Xr sysctl 8 variable. .It Op Fl L Legacy flag that indicates the same thing as .Dq Fl A Ar lzma . .It Fl o Ar outfile Name of the output file .Ar outfile . The default is to use the input name with the suffix .Pa .uzip for the .Xr zlib 3 compression or .Pa .ulzma for the .Xr lzma 3 . .It Fl S Print summary about the compression ratio as well as output file size after file has been processed. .It Fl s Ar cluster_size Split the image into clusters of .Ar cluster_size bytes, 16384 bytes by default. The .Ar cluster_size should be a multiple of 512 bytes. .It Fl v Display verbose messages. .It Fl Z Disable zero-block detection and elimination. When this option is set, .Nm compresses blocks of zero bytes just as it would any other block. When the option is not set, .Nm detects and compresses zero blocks in a space-efficient way. Setting .Fl Z increases compressed image sizes slightly, typically less than 0.1%. .El .Sh IMPLEMENTATION NOTES The compression ratio largely depends on the compression algorithm, level, and cluster size used. For large cluster sizes (16kB and higher), typical overall image compression ratios with .Xr zlib 3 are only 1-2% less than those achieved with .Xr gzip 1 over the entire image. However, it should be kept in mind that larger cluster sizes lead to higher overhead in the .Xr geom_uzip 4 class, as the class has to decompress the whole cluster even if only a few bytes from that cluster have to be read. .Pp Additionally, the threshold at 16-32 kB where a larger cluster size does not benefit overall compression ratio is an artifact of the .Xr zlib 3 algorithm in particular. .Ar Lzma and .Ar Zstd will continue to provide better compression ratios as cluster sizes are increased, at high enough compression levels. The same tradeoff continues to apply: reads in .Xr geom_uzip 4 become more expensive the greater the cluster size. .Pp -The -.Nm -utility -inserts a short shell script at the beginning of the generated image, -which makes it possible to -.Dq run -the image just like any other shell script. -The script tries to load the -.Xr geom_uzip 4 -class if it is not loaded, configure the image as an -.Xr md 4 -disk device using -.Xr mdconfig 8 , -and automatically mount it using -.Xr mount_cd9660 8 -on the mount point provided as the first argument to the script. -.Pp The de-duplication is a .Fx specific feature and while it does not require any changes to on-disk compressed image format, however it did require some matching changes to the .Xr geom_uzip 4 to handle resulting images correctly. .Pp To make use of .Ar zstd .Nm images, the kernel must be configured with .Cd ZSTDIO . It is enabled by default in many .Cd GENERIC kernels provided as binary distributions by .Fx . The status on any particular system can be verified by checking .Xr sysctl 8 .Dv kern.features.geom_uzip_zstd for .Dq 1 . .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Pp +The following describes how to create and mount a uzip image. +.Pp +Create a file system image: +.Bd -literal -offset indent +makefs /src.img /usr/src +.Ed +.Pp +Create the uzip image, the output file will be named src.img.uzip: +.Bd -literal -offset indent +mkuzip /src.img +.Ed +.Pp +Ensure geom_uzip is loaded: +.Bd -literal -offset indent +kldload geom_uzip +.Ed +.Pp +Create an MD device backed by the uzip image: +.Bd -literal -offset indent +mdconfig -f /src.img.uzip +.Ed +.Pp +Mount the uzip image: +.Bd -literal -offset indent +mount -o ro /dev/md0.uzip /mnt +.Ed .Sh SEE ALSO .Xr gzip 1 , .Xr xz 1 , .Xr zstd 1 , .Xr zlib 3 , .Xr geom 4 , .Xr geom_uzip 4 , .Xr md 4 , .Xr mdconfig 8 , .Xr mount_cd9660 8 .Sh AUTHORS .An Maxim Sobolev Aq Mt sobomax@FreeBSD.org diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c index f627562af7df..5d2aa9a48a98 100644 --- a/usr.bin/mkuzip/mkuzip.c +++ b/usr.bin/mkuzip/mkuzip.c @@ -1,502 +1,502 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2004-2016 Maxim Sobolev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mkuzip.h" #include "mkuz_cloop.h" #include "mkuz_blockcache.h" #include "mkuz_lzma.h" #include "mkuz_zlib.h" #include "mkuz_zstd.h" #include "mkuz_blk.h" #include "mkuz_cfg.h" #include "mkuz_conveyor.h" #include "mkuz_format.h" #include "mkuz_fqueue.h" #include "mkuz_time.h" #include "mkuz_insize.h" #define DEFAULT_CLSTSIZE 16384 enum UZ_ALGORITHM { UZ_ZLIB = 0, UZ_LZMA, UZ_ZSTD, UZ_INVALID }; static const struct mkuz_format uzip_fmts[] = { [UZ_ZLIB] = { .option = "zlib", .magic = CLOOP_MAGIC_ZLIB, .default_sufx = DEFAULT_SUFX_ZLIB, .f_compress_bound = mkuz_zlib_cbound, .f_init = mkuz_zlib_init, .f_compress = mkuz_zlib_compress, }, [UZ_LZMA] = { .option = "lzma", .magic = CLOOP_MAGIC_LZMA, .default_sufx = DEFAULT_SUFX_LZMA, .f_compress_bound = mkuz_lzma_cbound, .f_init = mkuz_lzma_init, .f_compress = mkuz_lzma_compress, }, [UZ_ZSTD] = { .option = "zstd", .magic = CLOOP_MAGIC_ZSTD, .default_sufx = DEFAULT_SUFX_ZSTD, .f_compress_bound = mkuz_zstd_cbound, .f_init = mkuz_zstd_init, .f_compress = mkuz_zstd_compress, }, }; static struct mkuz_blk *readblock(int, u_int32_t); static void usage(void) __dead2; static void cleanup(void); static char *cleanfile = NULL; static int cmp_blkno(const struct mkuz_blk *bp, void *p) { uint32_t *ap; ap = (uint32_t *)p; return (bp->info.blkno == *ap); } int main(int argc, char **argv) { struct mkuz_cfg cfs; char *oname; uint64_t *toc; int i, io, opt, tmp; struct { int en; FILE *f; } summary; struct iovec iov[2]; uint64_t offset, last_offset; struct cloop_header hdr; struct mkuz_conveyor *cvp; struct mkuz_blk_info *chit; size_t ncpusz, ncpu, magiclen; double st, et; enum UZ_ALGORITHM comp_alg; int comp_level; st = getdtime(); ncpusz = sizeof(size_t); if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { ncpu = 1; } else if (ncpu > MAX_WORKERS_AUTO) { ncpu = MAX_WORKERS_AUTO; } memset(&hdr, 0, sizeof(hdr)); cfs.blksz = DEFAULT_CLSTSIZE; oname = NULL; cfs.verbose = 0; cfs.no_zcomp = 0; cfs.en_dedup = 0; summary.en = 0; summary.f = stderr; comp_alg = UZ_ZLIB; comp_level = USE_DEFAULT_LEVEL; cfs.nworkers = ncpu; struct mkuz_blk *iblk, *oblk; while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { switch(opt) { case 'A': for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { if (strcmp(uzip_fmts[tmp].option, optarg) == 0) break; } if (tmp == UZ_INVALID) errx(1, "invalid algorithm specified: %s", optarg); /* Not reached */ comp_alg = tmp; break; case 'C': comp_level = atoi(optarg); break; case 'o': oname = optarg; break; case 's': tmp = atoi(optarg); if (tmp <= 0) { errx(1, "invalid cluster size specified: %s", optarg); /* Not reached */ } cfs.blksz = tmp; break; case 'v': cfs.verbose = 1; break; case 'Z': cfs.no_zcomp = 1; break; case 'd': cfs.en_dedup = 1; break; case 'L': comp_alg = UZ_LZMA; break; case 'S': summary.en = 1; summary.f = stdout; break; case 'j': tmp = atoi(optarg); if (tmp <= 0) { errx(1, "invalid number of compression threads" " specified: %s", optarg); /* Not reached */ } cfs.nworkers = tmp; break; default: usage(); /* Not reached */ } } argc -= optind; argv += optind; if (argc != 1) { usage(); /* Not reached */ } cfs.handler = &uzip_fmts[comp_alg]; magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); assert(magiclen < sizeof(hdr.magic)); if (cfs.en_dedup != 0) { /* * Dedupe requires a version 3 format. Don't downgrade newer * formats. */ if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; hdr.magic[CLOOP_OFS_COMPR] = tolower(hdr.magic[CLOOP_OFS_COMPR]); } if (cfs.blksz % DEV_BSIZE != 0) errx(1, "cluster size should be multiple of %d", DEV_BSIZE); cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); if (cfs.cbound_blksz > MAXPHYS) errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", cfs.cbound_blksz, (size_t)MAXPHYS); cfs.handler->f_init(&comp_level); cfs.comp_level = comp_level; cfs.iname = argv[0]; if (oname == NULL) { asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); if (oname == NULL) { err(1, "can't allocate memory"); /* Not reached */ } } signal(SIGHUP, exit); signal(SIGINT, exit); signal(SIGTERM, exit); signal(SIGXCPU, exit); signal(SIGXFSZ, exit); atexit(cleanup); cfs.fdr = open(cfs.iname, O_RDONLY); if (cfs.fdr < 0) { err(1, "open(%s)", cfs.iname); /* Not reached */ } cfs.isize = mkuz_get_insize(&cfs); if (cfs.isize < 0) { errx(1, "can't determine input image size"); /* Not reached */ } hdr.nblocks = cfs.isize / cfs.blksz; if ((cfs.isize % cfs.blksz) != 0) { if (cfs.verbose != 0) fprintf(stderr, "file size is not multiple " "of %d, padding data\n", cfs.blksz); hdr.nblocks++; } toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); /* * Initialize last+1 entry with non-heap trash. If final padding is * added later, it may or may not be overwritten with an offset * representing the length of the final compressed block. If not, * initialize to a defined value. */ toc[hdr.nblocks] = 0; cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, - S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (cfs.fdw < 0) { err(1, "open(%s)", oname); /* Not reached */ } cleanfile = oname; /* Prepare header that we will write later when we have index ready. */ iov[0].iov_base = (char *)&hdr; iov[0].iov_len = sizeof(hdr); iov[1].iov_base = (char *)toc; iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); offset = iov[0].iov_len + iov[1].iov_len; /* Reserve space for header */ lseek(cfs.fdw, offset, SEEK_SET); if (cfs.verbose != 0) { fprintf(stderr, "data size %ju bytes, number of clusters " "%u, index length %zu bytes\n", cfs.isize, hdr.nblocks, iov[1].iov_len); } cvp = mkuz_conveyor_ctor(&cfs); last_offset = 0; iblk = oblk = NULL; for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { iblk = readblock(cfs.fdr, cfs.blksz); mkuz_fqueue_enq(cvp->wrk_queue, iblk); if (iblk != MKUZ_BLK_EOF && (i < (cfs.nworkers * ITEMS_PER_WORKER))) { continue; } drain: oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); assert(oblk->info.blkno == (unsigned)io); oblk->info.offset = offset; chit = NULL; if (cfs.en_dedup != 0 && oblk->info.len > 0) { chit = mkuz_blkcache_regblock(cfs.fdw, oblk); /* * There should be at least one non-empty block * between us and the backref'ed offset, otherwise * we won't be able to parse that sequence correctly * as it would be indistinguishible from another * empty block. */ if (chit != NULL && chit->offset == last_offset) { chit = NULL; } } if (chit != NULL) { toc[io] = htobe64(chit->offset); oblk->info.len = 0; } else { if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, oblk->info.len) < 0) { err(1, "write(%s)", oname); /* Not reached */ } toc[io] = htobe64(offset); last_offset = offset; offset += oblk->info.len; } if (cfs.verbose != 0) { fprintf(stderr, "cluster #%d, in %u bytes, " "out len=%lu offset=%lu", io, cfs.blksz, (u_long)oblk->info.len, (u_long)be64toh(toc[io])); if (chit != NULL) { fprintf(stderr, " (backref'ed to #%d)", chit->blkno); } fprintf(stderr, "\n"); } free(oblk); io += 1; if (iblk == MKUZ_BLK_EOF) { if (io < i) goto drain; /* Last block, see if we need to add some padding */ if ((offset % DEV_BSIZE) == 0) continue; oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); oblk->info.blkno = io; oblk->info.len = oblk->alen; if (cfs.verbose != 0) { fprintf(stderr, "padding data with %lu bytes " "so that file size is multiple of %d\n", (u_long)oblk->alen, DEV_BSIZE); } mkuz_fqueue_enq(cvp->results, oblk); goto drain; } } close(cfs.fdr); if (cfs.verbose != 0 || summary.en != 0) { et = getdtime(); fprintf(summary.f, "compressed data to %ju bytes, saved %lld " "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, (long long)(cfs.isize - offset), 100.0 * (long long)(cfs.isize - offset) / (float)cfs.isize, (float)cfs.isize / (et - st)); } /* Convert to big endian */ hdr.blksz = htonl(cfs.blksz); hdr.nblocks = htonl(hdr.nblocks); /* Write headers into pre-allocated space */ lseek(cfs.fdw, 0, SEEK_SET); if (writev(cfs.fdw, iov, 2) < 0) { err(1, "writev(%s)", oname); /* Not reached */ } cleanfile = NULL; close(cfs.fdw); exit(0); } static struct mkuz_blk * readblock(int fd, u_int32_t clstsize) { int numread; struct mkuz_blk *rval; static int blockcnt; off_t cpos; rval = mkuz_blk_ctor(clstsize); rval->info.blkno = blockcnt; blockcnt += 1; cpos = lseek(fd, 0, SEEK_CUR); if (cpos < 0) { err(1, "readblock: lseek() failed"); /* Not reached */ } rval->info.offset = cpos; numread = read(fd, rval->data, clstsize); if (numread < 0) { err(1, "readblock: read() failed"); /* Not reached */ } if (numread == 0) { free(rval); return MKUZ_BLK_EOF; } rval->info.len = numread; return rval; } static void usage(void) { fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " "[-j ncompr] infile\n"); exit(1); } void * mkuz_safe_malloc(size_t size) { void *retval; retval = malloc(size); if (retval == NULL) { err(1, "can't allocate memory"); /* Not reached */ } return retval; } void * mkuz_safe_zmalloc(size_t size) { void *retval; retval = mkuz_safe_malloc(size); bzero(retval, size); return retval; } static void cleanup(void) { if (cleanfile != NULL) unlink(cleanfile); } int mkuz_memvcmp(const void *memory, unsigned char val, size_t size) { const u_char *mm; mm = (const u_char *)memory; return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; }