Index: stable/12/usr.bin/gzip/gzip.c =================================================================== --- stable/12/usr.bin/gzip/gzip.c (revision 360185) +++ stable/12/usr.bin/gzip/gzip.c (revision 360186) @@ -1,2363 +1,2363 @@ /* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008, 2009, 2010, 2011, 2015, 2017 * Matthew R. Green * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008,\ 2009, 2010, 2011, 2015, 2017 Matthew R. Green. All rights reserved."); __FBSDID("$FreeBSD$"); #endif /* not lint */ /* * gzip.c -- GPL free gzip using zlib. * * RFC 1950 covers the zlib format * RFC 1951 covers the deflate format * RFC 1952 covers the gzip format * * TODO: * - use mmap where possible * - make bzip2/compress -v/-t/-l support work as well as possible */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* what type of file are we dealing with */ enum filetype { FT_GZIP, #ifndef NO_BZIP2_SUPPORT FT_BZIP2, #endif #ifndef NO_COMPRESS_SUPPORT FT_Z, #endif #ifndef NO_PACK_SUPPORT FT_PACK, #endif #ifndef NO_XZ_SUPPORT FT_XZ, #endif #ifndef NO_LZ_SUPPORT FT_LZ, #endif FT_LAST, FT_UNKNOWN }; #ifndef NO_BZIP2_SUPPORT #include #define BZ2_SUFFIX ".bz2" #define BZIP2_MAGIC "BZh" #endif #ifndef NO_COMPRESS_SUPPORT #define Z_SUFFIX ".Z" #define Z_MAGIC "\037\235" #endif #ifndef NO_PACK_SUPPORT #define PACK_MAGIC "\037\036" #endif #ifndef NO_XZ_SUPPORT #include #define XZ_SUFFIX ".xz" #define XZ_MAGIC "\3757zXZ" #endif #ifndef NO_LZ_SUPPORT #define LZ_SUFFIX ".lz" #define LZ_MAGIC "LZIP" #endif #define GZ_SUFFIX ".gz" #define BUFLEN (64 * 1024) #define GZIP_MAGIC0 0x1F #define GZIP_MAGIC1 0x8B #define GZIP_OMAGIC1 0x9E #define GZIP_TIMESTAMP (off_t)4 #define GZIP_ORIGNAME (off_t)10 #define HEAD_CRC 0x02 #define EXTRA_FIELD 0x04 #define ORIG_NAME 0x08 #define COMMENT 0x10 #define OS_CODE 3 /* Unix */ typedef struct { const char *zipped; int ziplen; const char *normal; /* for unzip - must not be longer than zipped */ } suffixes_t; static suffixes_t suffixes[] = { #define SUFFIX(Z, N) {Z, sizeof Z - 1, N} SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S .xxx */ #ifndef SMALL SUFFIX(GZ_SUFFIX, ""), SUFFIX(".z", ""), SUFFIX("-gz", ""), SUFFIX("-z", ""), SUFFIX("_z", ""), SUFFIX(".taz", ".tar"), SUFFIX(".tgz", ".tar"), #ifndef NO_BZIP2_SUPPORT SUFFIX(BZ2_SUFFIX, ""), SUFFIX(".tbz", ".tar"), SUFFIX(".tbz2", ".tar"), #endif #ifndef NO_COMPRESS_SUPPORT SUFFIX(Z_SUFFIX, ""), #endif #ifndef NO_XZ_SUPPORT SUFFIX(XZ_SUFFIX, ""), #endif #ifndef NO_LZ_SUPPORT SUFFIX(LZ_SUFFIX, ""), #endif SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */ #endif /* SMALL */ #undef SUFFIX }; #define NUM_SUFFIXES (nitems(suffixes)) #define SUFFIX_MAXLEN 30 static const char gzip_version[] = "FreeBSD gzip 20190107"; #ifndef SMALL static const char gzip_copyright[] = \ " Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n" " All rights reserved.\n" "\n" " Redistribution and use in source and binary forms, with or without\n" " modification, are permitted provided that the following conditions\n" " are met:\n" " 1. Redistributions of source code must retain the above copyright\n" " notice, this list of conditions and the following disclaimer.\n" " 2. Redistributions in binary form must reproduce the above copyright\n" " notice, this list of conditions and the following disclaimer in the\n" " documentation and/or other materials provided with the distribution.\n" "\n" " THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" " IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" " OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" " IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n" " BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n" " LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n" " AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n" " OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n" " OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n" " SUCH DAMAGE."; #endif static int cflag; /* stdout mode */ static int dflag; /* decompress mode */ static int lflag; /* list mode */ static int numflag = 6; /* gzip -1..-9 value */ static const char *remove_file = NULL; /* file to be removed upon SIGINT */ static int fflag; /* force mode */ #ifndef SMALL static int kflag; /* don't delete input files */ static int nflag; /* don't save name/timestamp */ static int Nflag; /* don't restore name/timestamp */ static int qflag; /* quiet mode */ static int rflag; /* recursive mode */ static int tflag; /* test */ static int vflag; /* verbose mode */ static sig_atomic_t print_info = 0; #else #define qflag 0 #define tflag 0 #endif static int exit_value = 0; /* exit value */ static const char *infile; /* name of file coming in */ static void maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2; #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ !defined(NO_XZ_SUPPORT) static void maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2; #endif static void maybe_warn(const char *fmt, ...) __printflike(1, 2); static void maybe_warnx(const char *fmt, ...) __printflike(1, 2); static enum filetype file_gettype(u_char *); #ifdef SMALL #define gz_compress(if, of, sz, fn, tm) gz_compress(if, of, sz) #endif static off_t gz_compress(int, int, off_t *, const char *, uint32_t); static off_t gz_uncompress(int, int, char *, size_t, off_t *, const char *); static off_t file_compress(char *, char *, size_t); static off_t file_uncompress(char *, char *, size_t); static void handle_pathname(char *); static void handle_file(char *, struct stat *); static void handle_stdin(void); static void handle_stdout(void); static void print_ratio(off_t, off_t, FILE *); static void print_list(int fd, off_t, const char *, time_t); static void usage(void) __dead2; static void display_version(void) __dead2; #ifndef SMALL static void display_license(void); #endif static const suffixes_t *check_suffix(char *, int); static ssize_t read_retry(int, void *, size_t); static ssize_t write_retry(int, const void *, size_t); static void print_list_out(off_t, off_t, const char*); #ifdef SMALL #define infile_set(f,t) infile_set(f) #endif static void infile_set(const char *newinfile, off_t total); #ifdef SMALL #define unlink_input(f, sb) unlink(f) #define check_siginfo() /* nothing */ #define setup_signals() /* nothing */ #define infile_newdata(t) /* nothing */ #else static off_t infile_total; /* total expected to read/write */ static off_t infile_current; /* current read/write */ static void check_siginfo(void); static off_t cat_fd(unsigned char *, size_t, off_t *, int fd); static void prepend_gzip(char *, int *, char ***); static void handle_dir(char *); static void print_verbage(const char *, const char *, off_t, off_t); static void print_test(const char *, int); static void copymodes(int fd, const struct stat *, const char *file); static int check_outfile(const char *outfile); static void setup_signals(void); static void infile_newdata(size_t newdata); static void infile_clear(void); #endif #ifndef NO_BZIP2_SUPPORT static off_t unbzip2(int, int, char *, size_t, off_t *); #endif #ifndef NO_COMPRESS_SUPPORT static FILE *zdopen(int); static off_t zuncompress(FILE *, FILE *, char *, size_t, off_t *); #endif #ifndef NO_PACK_SUPPORT static off_t unpack(int, int, char *, size_t, off_t *); #endif #ifndef NO_XZ_SUPPORT static off_t unxz(int, int, char *, size_t, off_t *); static off_t unxz_len(int); #endif #ifndef NO_LZ_SUPPORT static off_t unlz(int, int, char *, size_t, off_t *); #endif #ifdef SMALL #define getopt_long(a,b,c,d,e) getopt(a,b,c) #else static const struct option longopts[] = { { "stdout", no_argument, 0, 'c' }, { "to-stdout", no_argument, 0, 'c' }, { "decompress", no_argument, 0, 'd' }, { "uncompress", no_argument, 0, 'd' }, { "force", no_argument, 0, 'f' }, { "help", no_argument, 0, 'h' }, { "keep", no_argument, 0, 'k' }, { "list", no_argument, 0, 'l' }, { "no-name", no_argument, 0, 'n' }, { "name", no_argument, 0, 'N' }, { "quiet", no_argument, 0, 'q' }, { "recursive", no_argument, 0, 'r' }, { "suffix", required_argument, 0, 'S' }, { "test", no_argument, 0, 't' }, { "verbose", no_argument, 0, 'v' }, { "version", no_argument, 0, 'V' }, { "fast", no_argument, 0, '1' }, { "best", no_argument, 0, '9' }, { "ascii", no_argument, 0, 'a' }, { "license", no_argument, 0, 'L' }, { NULL, no_argument, 0, 0 }, }; #endif int main(int argc, char **argv) { const char *progname = getprogname(); #ifndef SMALL char *gzip; int len; #endif int ch; setup_signals(); #ifndef SMALL if ((gzip = getenv("GZIP")) != NULL) prepend_gzip(gzip, &argc, &argv); #endif /* * XXX * handle being called `gunzip', `zcat' and `gzcat' */ if (strcmp(progname, "gunzip") == 0) dflag = 1; else if (strcmp(progname, "zcat") == 0 || strcmp(progname, "gzcat") == 0) dflag = cflag = 1; #ifdef SMALL #define OPT_LIST "123456789cdhlV" #else #define OPT_LIST "123456789acdfhklLNnqrS:tVv" #endif while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) { switch (ch) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': numflag = ch - '0'; break; case 'c': cflag = 1; break; case 'd': dflag = 1; break; case 'l': lflag = 1; dflag = 1; break; case 'V': display_version(); /* NOTREACHED */ #ifndef SMALL case 'a': fprintf(stderr, "%s: option --ascii ignored on this system\n", progname); break; case 'f': fflag = 1; break; case 'k': kflag = 1; break; case 'L': display_license(); /* NOT REACHED */ case 'N': nflag = 0; Nflag = 1; break; case 'n': nflag = 1; Nflag = 0; break; case 'q': qflag = 1; break; case 'r': rflag = 1; break; case 'S': len = strlen(optarg); if (len != 0) { if (len > SUFFIX_MAXLEN) errx(1, "incorrect suffix: '%s': too long", optarg); suffixes[0].zipped = optarg; suffixes[0].ziplen = len; } else { suffixes[NUM_SUFFIXES - 1].zipped = ""; suffixes[NUM_SUFFIXES - 1].ziplen = 0; } break; case 't': cflag = 1; tflag = 1; dflag = 1; break; case 'v': vflag = 1; break; #endif default: usage(); /* NOTREACHED */ } } argv += optind; argc -= optind; if (argc == 0) { if (dflag) /* stdin mode */ handle_stdin(); else /* stdout mode */ handle_stdout(); } else { do { handle_pathname(argv[0]); } while (*++argv); } #ifndef SMALL if (qflag == 0 && lflag && argc > 1) print_list(-1, 0, "(totals)", 0); #endif exit(exit_value); } /* maybe print a warning */ void maybe_warn(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarn(fmt, ap); va_end(ap); } if (exit_value == 0) exit_value = 1; } /* ... without an errno. */ void maybe_warnx(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarnx(fmt, ap); va_end(ap); } if (exit_value == 0) exit_value = 1; } /* maybe print an error */ void maybe_err(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarn(fmt, ap); va_end(ap); } exit(2); } #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ !defined(NO_XZ_SUPPORT) /* ... without an errno. */ void maybe_errx(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarnx(fmt, ap); va_end(ap); } exit(2); } #endif #ifndef SMALL /* split up $GZIP and prepend it to the argument list */ static void prepend_gzip(char *gzip, int *argc, char ***argv) { char *s, **nargv, **ac; int nenvarg = 0, i; /* scan how many arguments there are */ for (s = gzip;;) { while (*s == ' ' || *s == '\t') s++; if (*s == 0) goto count_done; nenvarg++; while (*s != ' ' && *s != '\t') if (*s++ == 0) goto count_done; } count_done: /* punt early */ if (nenvarg == 0) return; *argc += nenvarg; ac = *argv; nargv = (char **)malloc((*argc + 1) * sizeof(char *)); if (nargv == NULL) maybe_err("malloc"); /* stash this away */ *argv = nargv; /* copy the program name first */ i = 0; nargv[i++] = *(ac++); /* take a copy of $GZIP and add it to the array */ s = strdup(gzip); if (s == NULL) maybe_err("strdup"); for (;;) { /* Skip whitespaces. */ while (*s == ' ' || *s == '\t') s++; if (*s == 0) goto copy_done; nargv[i++] = s; /* Find the end of this argument. */ while (*s != ' ' && *s != '\t') if (*s++ == 0) /* Argument followed by NUL. */ goto copy_done; /* Terminate by overwriting ' ' or '\t' with NUL. */ *s++ = 0; } copy_done: /* copy the original arguments and a NULL */ while (*ac) nargv[i++] = *(ac++); nargv[i] = NULL; } #endif /* compress input to output. Return bytes read, -1 on error */ static off_t gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime) { z_stream z; char *outbufp, *inbufp; off_t in_tot = 0, out_tot = 0; ssize_t in_size; int i, error; uLong crc; #ifdef SMALL static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0, 0, 0, 0, 0, 0, OS_CODE }; #endif outbufp = malloc(BUFLEN); inbufp = malloc(BUFLEN); if (outbufp == NULL || inbufp == NULL) { maybe_err("malloc failed"); goto out; } memset(&z, 0, sizeof z); z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = 0; #ifdef SMALL memcpy(outbufp, header, sizeof header); i = sizeof header; #else if (nflag != 0) { mtime = 0; origname = ""; } i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, *origname ? ORIG_NAME : 0, mtime & 0xff, (mtime >> 8) & 0xff, (mtime >> 16) & 0xff, (mtime >> 24) & 0xff, numflag == 1 ? 4 : numflag == 9 ? 2 : 0, OS_CODE, origname); if (i >= BUFLEN) /* this need PATH_MAX > BUFLEN ... */ maybe_err("snprintf"); if (*origname) i++; #endif z.next_out = (unsigned char *)outbufp + i; z.avail_out = BUFLEN - i; error = deflateInit2(&z, numflag, Z_DEFLATED, (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY); if (error != Z_OK) { maybe_warnx("deflateInit2 failed"); in_tot = -1; goto out; } crc = crc32(0L, Z_NULL, 0); for (;;) { if (z.avail_out == 0) { if (write_retry(out, outbufp, BUFLEN) != BUFLEN) { maybe_warn("write"); out_tot = -1; goto out; } out_tot += BUFLEN; z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; } if (z.avail_in == 0) { in_size = read(in, inbufp, BUFLEN); if (in_size < 0) { maybe_warn("read"); in_tot = -1; goto out; } if (in_size == 0) break; infile_newdata(in_size); crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size); in_tot += in_size; z.next_in = (unsigned char *)inbufp; z.avail_in = in_size; } error = deflate(&z, Z_NO_FLUSH); if (error != Z_OK && error != Z_STREAM_END) { maybe_warnx("deflate failed"); in_tot = -1; goto out; } } /* clean up */ for (;;) { size_t len; ssize_t w; error = deflate(&z, Z_FINISH); if (error != Z_OK && error != Z_STREAM_END) { maybe_warnx("deflate failed"); in_tot = -1; goto out; } len = (char *)z.next_out - outbufp; w = write_retry(out, outbufp, len); if (w == -1 || (size_t)w != len) { maybe_warn("write"); out_tot = -1; goto out; } out_tot += len; z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; if (error == Z_STREAM_END) break; } if (deflateEnd(&z) != Z_OK) { maybe_warnx("deflateEnd failed"); in_tot = -1; goto out; } i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", (int)crc & 0xff, (int)(crc >> 8) & 0xff, (int)(crc >> 16) & 0xff, (int)(crc >> 24) & 0xff, (int)in_tot & 0xff, (int)(in_tot >> 8) & 0xff, (int)(in_tot >> 16) & 0xff, (int)(in_tot >> 24) & 0xff); if (i != 8) maybe_err("snprintf"); if (write_retry(out, outbufp, i) != i) { maybe_warn("write"); in_tot = -1; } else out_tot += i; out: if (inbufp != NULL) free(inbufp); if (outbufp != NULL) free(outbufp); if (gsizep) *gsizep = out_tot; return in_tot; } /* * uncompress input to output then close the input. return the * uncompressed size written, and put the compressed sized read * into `*gsizep'. */ static off_t gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep, const char *filename) { z_stream z; char *outbufp, *inbufp; off_t out_tot = -1, in_tot = 0; uint32_t out_sub_tot = 0; enum { GZSTATE_MAGIC0, GZSTATE_MAGIC1, GZSTATE_METHOD, GZSTATE_FLAGS, GZSTATE_SKIPPING, GZSTATE_EXTRA, GZSTATE_EXTRA2, GZSTATE_EXTRA3, GZSTATE_ORIGNAME, GZSTATE_COMMENT, GZSTATE_HEAD_CRC1, GZSTATE_HEAD_CRC2, GZSTATE_INIT, GZSTATE_READ, GZSTATE_CRC, GZSTATE_LEN, } state = GZSTATE_MAGIC0; int flags = 0, skip_count = 0; int error = Z_STREAM_ERROR, done_reading = 0; uLong crc = 0; ssize_t wr; int needmore = 0; #define ADVANCE() { z.next_in++; z.avail_in--; } if ((outbufp = malloc(BUFLEN)) == NULL) { maybe_err("malloc failed"); goto out2; } if ((inbufp = malloc(BUFLEN)) == NULL) { maybe_err("malloc failed"); goto out1; } memset(&z, 0, sizeof z); z.avail_in = prelen; z.next_in = (unsigned char *)pre; z.avail_out = BUFLEN; z.next_out = (unsigned char *)outbufp; z.zalloc = NULL; z.zfree = NULL; z.opaque = 0; in_tot = prelen; out_tot = 0; for (;;) { check_siginfo(); if ((z.avail_in == 0 || needmore) && done_reading == 0) { ssize_t in_size; if (z.avail_in > 0) { memmove(inbufp, z.next_in, z.avail_in); } z.next_in = (unsigned char *)inbufp; in_size = read(in, z.next_in + z.avail_in, BUFLEN - z.avail_in); if (in_size == -1) { maybe_warn("failed to read stdin"); goto stop_and_fail; } else if (in_size == 0) { done_reading = 1; } infile_newdata(in_size); z.avail_in += in_size; needmore = 0; in_tot += in_size; } if (z.avail_in == 0) { if (done_reading && state != GZSTATE_MAGIC0) { maybe_warnx("%s: unexpected end of file", filename); goto stop_and_fail; } goto stop; } switch (state) { case GZSTATE_MAGIC0: if (*z.next_in != GZIP_MAGIC0) { if (in_tot > 0) { maybe_warnx("%s: trailing garbage " "ignored", filename); exit_value = 2; goto stop; } maybe_warnx("input not gziped (MAGIC0)"); goto stop_and_fail; } ADVANCE(); state++; out_sub_tot = 0; crc = crc32(0L, Z_NULL, 0); break; case GZSTATE_MAGIC1: if (*z.next_in != GZIP_MAGIC1 && *z.next_in != GZIP_OMAGIC1) { maybe_warnx("input not gziped (MAGIC1)"); goto stop_and_fail; } ADVANCE(); state++; break; case GZSTATE_METHOD: if (*z.next_in != Z_DEFLATED) { maybe_warnx("unknown compression method"); goto stop_and_fail; } ADVANCE(); state++; break; case GZSTATE_FLAGS: flags = *z.next_in; ADVANCE(); skip_count = 6; state++; break; case GZSTATE_SKIPPING: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_EXTRA: if ((flags & EXTRA_FIELD) == 0) { state = GZSTATE_ORIGNAME; break; } skip_count = *z.next_in; ADVANCE(); state++; break; case GZSTATE_EXTRA2: skip_count |= ((*z.next_in) << 8); ADVANCE(); state++; break; case GZSTATE_EXTRA3: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_ORIGNAME: if ((flags & ORIG_NAME) == 0) { state++; break; } if (*z.next_in == 0) state++; ADVANCE(); break; case GZSTATE_COMMENT: if ((flags & COMMENT) == 0) { state++; break; } if (*z.next_in == 0) state++; ADVANCE(); break; case GZSTATE_HEAD_CRC1: if (flags & HEAD_CRC) skip_count = 2; else skip_count = 0; state++; break; case GZSTATE_HEAD_CRC2: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_INIT: if (inflateInit2(&z, -MAX_WBITS) != Z_OK) { maybe_warnx("failed to inflateInit"); goto stop_and_fail; } state++; break; case GZSTATE_READ: error = inflate(&z, Z_FINISH); switch (error) { /* Z_BUF_ERROR goes with Z_FINISH... */ case Z_BUF_ERROR: if (z.avail_out > 0 && !done_reading) continue; case Z_STREAM_END: case Z_OK: break; case Z_NEED_DICT: maybe_warnx("Z_NEED_DICT error"); goto stop_and_fail; case Z_DATA_ERROR: maybe_warnx("data stream error"); goto stop_and_fail; case Z_STREAM_ERROR: maybe_warnx("internal stream error"); goto stop_and_fail; case Z_MEM_ERROR: maybe_warnx("memory allocation error"); goto stop_and_fail; default: maybe_warn("unknown error from inflate(): %d", error); } wr = BUFLEN - z.avail_out; if (wr != 0) { crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr); if ( #ifndef SMALL /* don't write anything with -t */ tflag == 0 && #endif write_retry(out, outbufp, wr) != wr) { maybe_warn("error writing to output"); goto stop_and_fail; } out_tot += wr; out_sub_tot += wr; } if (error == Z_STREAM_END) { inflateEnd(&z); state++; } z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; break; case GZSTATE_CRC: { uLong origcrc; if (z.avail_in < 4) { if (!done_reading) { needmore = 1; continue; } maybe_warnx("truncated input"); goto stop_and_fail; } origcrc = le32dec(&z.next_in[0]); if (origcrc != crc) { maybe_warnx("invalid compressed" " data--crc error"); goto stop_and_fail; } } z.avail_in -= 4; z.next_in += 4; if (!z.avail_in && done_reading) { goto stop; } state++; break; case GZSTATE_LEN: { uLong origlen; if (z.avail_in < 4) { if (!done_reading) { needmore = 1; continue; } maybe_warnx("truncated input"); goto stop_and_fail; } origlen = le32dec(&z.next_in[0]); if (origlen != out_sub_tot) { maybe_warnx("invalid compressed" " data--length error"); goto stop_and_fail; } } z.avail_in -= 4; z.next_in += 4; if (error < 0) { maybe_warnx("decompression error"); goto stop_and_fail; } state = GZSTATE_MAGIC0; break; } continue; stop_and_fail: out_tot = -1; stop: break; } if (state > GZSTATE_INIT) inflateEnd(&z); free(inbufp); out1: free(outbufp); out2: if (gsizep) *gsizep = in_tot; return (out_tot); } #ifndef SMALL /* * set the owner, mode, flags & utimes using the given file descriptor. * file is only used in possible warning messages. */ static void copymodes(int fd, const struct stat *sbp, const char *file) { struct timespec times[2]; struct stat sb; /* * If we have no info on the input, give this file some * default values and return.. */ if (sbp == NULL) { mode_t mask = umask(022); (void)fchmod(fd, DEFFILEMODE & ~mask); (void)umask(mask); return; } sb = *sbp; /* if the chown fails, remove set-id bits as-per compress(1) */ if (fchown(fd, sb.st_uid, sb.st_gid) < 0) { if (errno != EPERM) maybe_warn("couldn't fchown: %s", file); sb.st_mode &= ~(S_ISUID|S_ISGID); } /* we only allow set-id and the 9 normal permission bits */ sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO; if (fchmod(fd, sb.st_mode) < 0) maybe_warn("couldn't fchmod: %s", file); times[0] = sb.st_atim; times[1] = sb.st_mtim; if (futimens(fd, times) < 0) maybe_warn("couldn't futimens: %s", file); /* only try flags if they exist already */ if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0) maybe_warn("couldn't fchflags: %s", file); } #endif /* what sort of file is this? */ static enum filetype file_gettype(u_char *buf) { if (buf[0] == GZIP_MAGIC0 && (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1)) return FT_GZIP; else #ifndef NO_BZIP2_SUPPORT if (memcmp(buf, BZIP2_MAGIC, 3) == 0 && buf[3] >= '0' && buf[3] <= '9') return FT_BZIP2; else #endif #ifndef NO_COMPRESS_SUPPORT if (memcmp(buf, Z_MAGIC, 2) == 0) return FT_Z; else #endif #ifndef NO_PACK_SUPPORT if (memcmp(buf, PACK_MAGIC, 2) == 0) return FT_PACK; else #endif #ifndef NO_XZ_SUPPORT if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */ return FT_XZ; else #endif #ifndef NO_LZ_SUPPORT if (memcmp(buf, LZ_MAGIC, 4) == 0) return FT_LZ; else #endif return FT_UNKNOWN; } #ifndef SMALL /* check the outfile is OK. */ static int check_outfile(const char *outfile) { struct stat sb; int ok = 1; if (lflag == 0 && stat(outfile, &sb) == 0) { if (fflag) unlink(outfile); else if (isatty(STDIN_FILENO)) { char ans[10] = { 'n', '\0' }; /* default */ fprintf(stderr, "%s already exists -- do you wish to " "overwrite (y or n)? " , outfile); (void)fgets(ans, sizeof(ans) - 1, stdin); if (ans[0] != 'y' && ans[0] != 'Y') { fprintf(stderr, "\tnot overwriting\n"); ok = 0; } else unlink(outfile); } else { maybe_warnx("%s already exists -- skipping", outfile); ok = 0; } } return ok; } static void unlink_input(const char *file, const struct stat *sb) { struct stat nsb; if (kflag) return; if (stat(file, &nsb) != 0) /* Must be gone already */ return; if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino) /* Definitely a different file */ return; unlink(file); } static void got_sigint(int signo __unused) { if (remove_file != NULL) unlink(remove_file); _exit(2); } static void got_siginfo(int signo __unused) { print_info = 1; } static void setup_signals(void) { signal(SIGINFO, got_siginfo); signal(SIGINT, got_sigint); } static void infile_newdata(size_t newdata) { infile_current += newdata; } #endif static void infile_set(const char *newinfile, off_t total) { if (newinfile) infile = newinfile; #ifndef SMALL infile_total = total; #endif } static void infile_clear(void) { infile = NULL; #ifndef SMALL infile_total = infile_current = 0; #endif } static const suffixes_t * check_suffix(char *file, int xlate) { const suffixes_t *s; int len = strlen(file); char *sp; for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) { /* if it doesn't fit in "a.suf", don't bother */ if (s->ziplen >= len) continue; sp = file + len - s->ziplen; if (strcmp(s->zipped, sp) != 0) continue; if (xlate) strcpy(sp, s->normal); return s; } return NULL; } /* * compress the given file: create a corresponding .gz file and remove the * original. */ static off_t file_compress(char *file, char *outfile, size_t outsize) { int in; int out; off_t size, in_size; #ifndef SMALL struct stat isb, osb; const suffixes_t *suff; #endif in = open(file, O_RDONLY); if (in == -1) { maybe_warn("can't open %s", file); return (-1); } #ifndef SMALL if (fstat(in, &isb) != 0) { maybe_warn("couldn't stat: %s", file); close(in); return (-1); } #endif #ifndef SMALL if (fstat(in, &isb) != 0) { close(in); maybe_warn("can't stat %s", file); return -1; } infile_set(file, isb.st_size); #endif if (cflag == 0) { #ifndef SMALL if (isb.st_nlink > 1 && fflag == 0) { maybe_warnx("%s has %ju other link%s -- " "skipping", file, (uintmax_t)isb.st_nlink - 1, isb.st_nlink == 1 ? "" : "s"); close(in); return -1; } if (fflag == 0 && (suff = check_suffix(file, 0)) && suff->zipped[0] != 0) { maybe_warnx("%s already has %s suffix -- unchanged", file, suff->zipped); close(in); return (-1); } #endif /* Add (usually) .gz to filename */ if ((size_t)snprintf(outfile, outsize, "%s%s", file, suffixes[0].zipped) >= outsize) memcpy(outfile + outsize - suffixes[0].ziplen - 1, suffixes[0].zipped, suffixes[0].ziplen + 1); #ifndef SMALL if (check_outfile(outfile) == 0) { close(in); return (-1); } #endif } if (cflag == 0) { out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600); if (out == -1) { maybe_warn("could not create output: %s", outfile); fclose(stdin); return (-1); } #ifndef SMALL remove_file = outfile; #endif } else out = STDOUT_FILENO; in_size = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime); (void)close(in); /* * If there was an error, in_size will be -1. * If we compressed to stdout, just return the size. * Otherwise stat the file and check it is the correct size. * We only blow away the file if we can stat the output and it * has the expected size. */ if (cflag != 0) return in_size == -1 ? -1 : size; #ifndef SMALL if (fstat(out, &osb) != 0) { maybe_warn("couldn't stat: %s", outfile); goto bad_outfile; } if (osb.st_size != size) { maybe_warnx("output file: %s wrong size (%ju != %ju), deleting", outfile, (uintmax_t)osb.st_size, (uintmax_t)size); goto bad_outfile; } copymodes(out, &isb, outfile); remove_file = NULL; #endif if (close(out) == -1) maybe_warn("couldn't close output"); /* output is good, ok to delete input */ unlink_input(file, &isb); return (size); #ifndef SMALL bad_outfile: if (close(out) == -1) maybe_warn("couldn't close output"); maybe_warnx("leaving original %s", file); unlink(outfile); return (size); #endif } /* uncompress the given file and remove the original */ static off_t file_uncompress(char *file, char *outfile, size_t outsize) { struct stat isb, osb; off_t size; ssize_t rbytes; - unsigned char header1[4]; + unsigned char fourbytes[4]; enum filetype method; int fd, ofd, zfd = -1; int error; size_t in_size; #ifndef SMALL ssize_t rv; time_t timestamp = 0; char name[PATH_MAX + 1]; #endif /* gather the old name info */ fd = open(file, O_RDONLY); if (fd < 0) { maybe_warn("can't open %s", file); goto lose; } if (fstat(fd, &isb) != 0) { maybe_warn("can't stat %s", file); goto lose; } if (S_ISREG(isb.st_mode)) in_size = isb.st_size; else in_size = 0; infile_set(file, in_size); strlcpy(outfile, file, outsize); if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) { maybe_warnx("%s: unknown suffix -- ignored", file); goto lose; } - rbytes = read(fd, header1, sizeof header1); - if (rbytes != sizeof header1) { + rbytes = read(fd, fourbytes, sizeof fourbytes); + if (rbytes != sizeof fourbytes) { /* we don't want to fail here. */ #ifndef SMALL if (fflag) goto lose; #endif if (rbytes == -1) maybe_warn("can't read %s", file); else goto unexpected_EOF; goto lose; } infile_newdata(rbytes); - method = file_gettype(header1); + method = file_gettype(fourbytes); #ifndef SMALL if (fflag == 0 && method == FT_UNKNOWN) { maybe_warnx("%s: not in gzip format", file); goto lose; } #endif #ifndef SMALL if (method == FT_GZIP && Nflag) { unsigned char ts[4]; /* timestamp */ rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP); if (rv >= 0 && rv < (ssize_t)(sizeof ts)) goto unexpected_EOF; if (rv == -1) { if (!fflag) maybe_warn("can't read %s", file); goto lose; } infile_newdata(rv); timestamp = le32dec(&ts[0]); - if (header1[3] & ORIG_NAME) { + if (fourbytes[3] & ORIG_NAME) { rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME); if (rbytes < 0) { maybe_warn("can't read %s", file); goto lose; } if (name[0] != '\0') { char *dp, *nf; /* Make sure that name is NUL-terminated */ name[rbytes] = '\0'; /* strip saved directory name */ nf = strrchr(name, '/'); if (nf == NULL) nf = name; else nf++; /* preserve original directory name */ dp = strrchr(file, '/'); if (dp == NULL) dp = file; else dp++; snprintf(outfile, outsize, "%.*s%.*s", (int) (dp - file), file, (int) rbytes, nf); } } } #endif lseek(fd, 0, SEEK_SET); if (cflag == 0 || lflag) { #ifndef SMALL if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) { maybe_warnx("%s has %ju other links -- skipping", file, (uintmax_t)isb.st_nlink - 1); goto lose; } if (nflag == 0 && timestamp) isb.st_mtime = timestamp; if (check_outfile(outfile) == 0) goto lose; #endif } if (cflag) zfd = STDOUT_FILENO; else if (lflag) zfd = -1; else { zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600); if (zfd == STDOUT_FILENO) { /* We won't close STDOUT_FILENO later... */ zfd = dup(zfd); close(STDOUT_FILENO); } if (zfd == -1) { maybe_warn("can't open %s", outfile); goto lose; } remove_file = outfile; } switch (method) { #ifndef NO_BZIP2_SUPPORT case FT_BZIP2: /* XXX */ if (lflag) { maybe_warnx("no -l with bzip2 files"); goto lose; } size = unbzip2(fd, zfd, NULL, 0, NULL); break; #endif #ifndef NO_COMPRESS_SUPPORT case FT_Z: { FILE *in, *out; /* XXX */ if (lflag) { maybe_warnx("no -l with Lempel-Ziv files"); goto lose; } if ((in = zdopen(fd)) == NULL) { maybe_warn("zdopen for read: %s", file); goto lose; } out = fdopen(dup(zfd), "w"); if (out == NULL) { maybe_warn("fdopen for write: %s", outfile); fclose(in); goto lose; } size = zuncompress(in, out, NULL, 0, NULL); /* need to fclose() if ferror() is true... */ error = ferror(in); if (error | fclose(in)) { if (error) maybe_warn("failed infile"); else maybe_warn("failed infile fclose"); if (cflag == 0) unlink(outfile); (void)fclose(out); goto lose; } if (fclose(out) != 0) { maybe_warn("failed outfile fclose"); if (cflag == 0) unlink(outfile); goto lose; } break; } #endif #ifndef NO_PACK_SUPPORT case FT_PACK: if (lflag) { maybe_warnx("no -l with packed files"); goto lose; } size = unpack(fd, zfd, NULL, 0, NULL); break; #endif #ifndef NO_XZ_SUPPORT case FT_XZ: if (lflag) { size = unxz_len(fd); print_list_out(in_size, size, file); return -1; } size = unxz(fd, zfd, NULL, 0, NULL); break; #endif #ifndef NO_LZ_SUPPORT case FT_LZ: if (lflag) { maybe_warnx("no -l with lzip files"); goto lose; } size = unlz(fd, zfd, NULL, 0, NULL); break; #endif #ifndef SMALL case FT_UNKNOWN: if (lflag) { maybe_warnx("no -l for unknown filetypes"); goto lose; } size = cat_fd(NULL, 0, NULL, fd); break; #endif default: if (lflag) { print_list(fd, in_size, outfile, isb.st_mtime); close(fd); return -1; /* XXX */ } size = gz_uncompress(fd, zfd, NULL, 0, NULL, file); break; } if (close(fd) != 0) maybe_warn("couldn't close input"); if (zfd != STDOUT_FILENO && close(zfd) != 0) maybe_warn("couldn't close output"); if (size == -1) { if (cflag == 0) unlink(outfile); maybe_warnx("%s: uncompress failed", file); return -1; } /* if testing, or we uncompressed to stdout, this is all we need */ #ifndef SMALL if (tflag) return size; #endif /* if we are uncompressing to stdin, don't remove the file. */ if (cflag) return size; /* * if we create a file... */ /* * if we can't stat the file don't remove the file. */ ofd = open(outfile, O_RDWR, 0); if (ofd == -1) { maybe_warn("couldn't open (leaving original): %s", outfile); return -1; } if (fstat(ofd, &osb) != 0) { maybe_warn("couldn't stat (leaving original): %s", outfile); close(ofd); return -1; } if (osb.st_size != size) { maybe_warnx("stat gave different size: %ju != %ju (leaving original)", (uintmax_t)size, (uintmax_t)osb.st_size); close(ofd); unlink(outfile); return -1; } #ifndef SMALL copymodes(ofd, &isb, outfile); remove_file = NULL; #endif close(ofd); unlink_input(file, &isb); return size; unexpected_EOF: maybe_warnx("%s: unexpected end of file", file); lose: if (fd != -1) close(fd); if (zfd != -1 && zfd != STDOUT_FILENO) close(zfd); return -1; } #ifndef SMALL static void check_siginfo(void) { if (print_info == 0) return; if (infile) { if (infile_total) { int pcent = (int)((100.0 * infile_current) / infile_total); fprintf(stderr, "%s: done %llu/%llu bytes %d%%\n", infile, (unsigned long long)infile_current, (unsigned long long)infile_total, pcent); } else fprintf(stderr, "%s: done %llu bytes\n", infile, (unsigned long long)infile_current); } print_info = 0; } static off_t cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd) { char buf[BUFLEN]; off_t in_tot; ssize_t w; in_tot = count; w = write_retry(STDOUT_FILENO, prepend, count); if (w == -1 || (size_t)w != count) { maybe_warn("write to stdout"); return -1; } for (;;) { ssize_t rv; rv = read(fd, buf, sizeof buf); if (rv == 0) break; if (rv < 0) { maybe_warn("read from fd %d", fd); break; } infile_newdata(rv); if (write_retry(STDOUT_FILENO, buf, rv) != rv) { maybe_warn("write to stdout"); break; } in_tot += rv; } if (gsizep) *gsizep = in_tot; return (in_tot); } #endif static void handle_stdin(void) { struct stat isb; - unsigned char header1[4]; + unsigned char fourbytes[4]; size_t in_size; off_t usize, gsize; enum filetype method; ssize_t bytes_read; #ifndef NO_COMPRESS_SUPPORT FILE *in; #endif #ifndef SMALL if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) { maybe_warnx("standard input is a terminal -- ignoring"); goto out; } #endif if (fstat(STDIN_FILENO, &isb) < 0) { maybe_warn("fstat"); goto out; } if (S_ISREG(isb.st_mode)) in_size = isb.st_size; else in_size = 0; infile_set("(stdin)", in_size); if (lflag) { print_list(STDIN_FILENO, in_size, infile, isb.st_mtime); goto out; } - bytes_read = read_retry(STDIN_FILENO, header1, sizeof header1); + bytes_read = read_retry(STDIN_FILENO, fourbytes, sizeof fourbytes); if (bytes_read == -1) { maybe_warn("can't read stdin"); goto out; - } else if (bytes_read != sizeof(header1)) { + } else if (bytes_read != sizeof(fourbytes)) { maybe_warnx("(stdin): unexpected end of file"); goto out; } - method = file_gettype(header1); + method = file_gettype(fourbytes); switch (method) { default: #ifndef SMALL if (fflag == 0) { maybe_warnx("unknown compression format"); goto out; } - usize = cat_fd(header1, sizeof header1, &gsize, STDIN_FILENO); + usize = cat_fd(fourbytes, sizeof fourbytes, &gsize, STDIN_FILENO); break; #endif case FT_GZIP: usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, - (char *)header1, sizeof header1, &gsize, "(stdin)"); + (char *)fourbytes, sizeof fourbytes, &gsize, "(stdin)"); break; #ifndef NO_BZIP2_SUPPORT case FT_BZIP2: usize = unbzip2(STDIN_FILENO, STDOUT_FILENO, - (char *)header1, sizeof header1, &gsize); + (char *)fourbytes, sizeof fourbytes, &gsize); break; #endif #ifndef NO_COMPRESS_SUPPORT case FT_Z: if ((in = zdopen(STDIN_FILENO)) == NULL) { maybe_warnx("zopen of stdin"); goto out; } - usize = zuncompress(in, stdout, (char *)header1, - sizeof header1, &gsize); + usize = zuncompress(in, stdout, (char *)fourbytes, + sizeof fourbytes, &gsize); fclose(in); break; #endif #ifndef NO_PACK_SUPPORT case FT_PACK: usize = unpack(STDIN_FILENO, STDOUT_FILENO, - (char *)header1, sizeof header1, &gsize); + (char *)fourbytes, sizeof fourbytes, &gsize); break; #endif #ifndef NO_XZ_SUPPORT case FT_XZ: usize = unxz(STDIN_FILENO, STDOUT_FILENO, - (char *)header1, sizeof header1, &gsize); + (char *)fourbytes, sizeof fourbytes, &gsize); break; #endif #ifndef NO_LZ_SUPPORT case FT_LZ: usize = unlz(STDIN_FILENO, STDOUT_FILENO, - (char *)header1, sizeof header1, &gsize); + (char *)fourbytes, sizeof fourbytes, &gsize); break; #endif } #ifndef SMALL if (vflag && !tflag && usize != -1 && gsize != -1) print_verbage(NULL, NULL, usize, gsize); if (vflag && tflag) print_test("(stdin)", usize != -1); #else (void)&usize; #endif out: infile_clear(); } static void handle_stdout(void) { off_t gsize; #ifndef SMALL off_t usize; struct stat sb; time_t systime; uint32_t mtime; int ret; infile_set("(stdout)", 0); if (fflag == 0 && isatty(STDOUT_FILENO)) { maybe_warnx("standard output is a terminal -- ignoring"); return; } /* If stdin is a file use its mtime, otherwise use current time */ ret = fstat(STDIN_FILENO, &sb); if (ret < 0) { maybe_warn("Can't stat stdin"); return; } if (S_ISREG(sb.st_mode)) { infile_set("(stdout)", sb.st_size); mtime = (uint32_t)sb.st_mtime; } else { systime = time(NULL); if (systime == -1) { maybe_warn("time"); return; } mtime = (uint32_t)systime; } usize = #endif gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime); #ifndef SMALL if (vflag && !tflag && usize != -1 && gsize != -1) print_verbage(NULL, NULL, usize, gsize); #endif } /* do what is asked for, for the path name */ static void handle_pathname(char *path) { char *opath = path, *s = NULL; ssize_t len; int slen; struct stat sb; /* check for stdout/stdin */ if (path[0] == '-' && path[1] == '\0') { if (dflag) handle_stdin(); else handle_stdout(); return; } retry: if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 && lstat(path, &sb) != 0)) { /* lets try .gz if we're decompressing */ if (dflag && s == NULL && errno == ENOENT) { len = strlen(path); slen = suffixes[0].ziplen; s = malloc(len + slen + 1); if (s == NULL) maybe_err("malloc"); memcpy(s, path, len); memcpy(s + len, suffixes[0].zipped, slen + 1); path = s; goto retry; } maybe_warn("can't stat: %s", opath); goto out; } if (S_ISDIR(sb.st_mode)) { #ifndef SMALL if (rflag) handle_dir(path); else #endif maybe_warnx("%s is a directory", path); goto out; } if (S_ISREG(sb.st_mode)) handle_file(path, &sb); else maybe_warnx("%s is not a regular file", path); out: if (s) free(s); } /* compress/decompress a file */ static void handle_file(char *file, struct stat *sbp) { off_t usize, gsize; char outfile[PATH_MAX]; infile_set(file, sbp->st_size); if (dflag) { usize = file_uncompress(file, outfile, sizeof(outfile)); #ifndef SMALL if (vflag && tflag) print_test(file, usize != -1); #endif if (usize == -1) return; gsize = sbp->st_size; } else { gsize = file_compress(file, outfile, sizeof(outfile)); if (gsize == -1) return; usize = sbp->st_size; } infile_clear(); #ifndef SMALL if (vflag && !tflag) print_verbage(file, (cflag) ? NULL : outfile, usize, gsize); #endif } #ifndef SMALL /* this is used with -r to recursively descend directories */ static void handle_dir(char *dir) { char *path_argv[2]; FTS *fts; FTSENT *entry; path_argv[0] = dir; path_argv[1] = 0; fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL); if (fts == NULL) { warn("couldn't fts_open %s", dir); return; } while ((entry = fts_read(fts))) { switch(entry->fts_info) { case FTS_D: case FTS_DP: continue; case FTS_DNR: case FTS_ERR: case FTS_NS: maybe_warn("%s", entry->fts_path); continue; case FTS_F: handle_file(entry->fts_path, entry->fts_statp); } } (void)fts_close(fts); } #endif /* print a ratio - size reduction as a fraction of uncompressed size */ static void print_ratio(off_t in, off_t out, FILE *where) { int percent10; /* 10 * percent */ off_t diff; char buff[8]; int len; diff = in - out/2; if (in == 0 && out == 0) percent10 = 0; else if (diff < 0) /* * Output is more than double size of input! print -99.9% * Quite possibly we've failed to get the original size. */ percent10 = -999; else { /* * We only need 12 bits of result from the final division, * so reduce the values until a 32bit division will suffice. */ while (in > 0x100000) { diff >>= 1; in >>= 1; } if (in != 0) percent10 = ((u_int)diff * 2000) / (u_int)in - 1000; else percent10 = 0; } len = snprintf(buff, sizeof buff, "%2.2d.", percent10); /* Move the '.' to before the last digit */ buff[len - 1] = buff[len - 2]; buff[len - 2] = '.'; fprintf(where, "%5s%%", buff); } #ifndef SMALL /* print compression statistics, and the new name (if there is one!) */ static void print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize) { if (file) fprintf(stderr, "%s:%s ", file, strlen(file) < 7 ? "\t\t" : "\t"); print_ratio(usize, gsize, stderr); if (nfile) fprintf(stderr, " -- replaced with %s", nfile); fprintf(stderr, "\n"); fflush(stderr); } /* print test results */ static void print_test(const char *file, int ok) { if (exit_value == 0 && ok == 0) exit_value = 1; fprintf(stderr, "%s:%s %s\n", file, strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK"); fflush(stderr); } #endif /* print a file's info ala --list */ /* eg: compressed uncompressed ratio uncompressed_name 354841 1679360 78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar */ static void print_list(int fd, off_t out, const char *outfile, time_t ts) { static int first = 1; #ifndef SMALL static off_t in_tot, out_tot; uint32_t crc = 0; #endif off_t in = 0, rv; if (first) { #ifndef SMALL if (vflag) printf("method crc date time "); #endif if (qflag == 0) printf(" compressed uncompressed " "ratio uncompressed_name\n"); } first = 0; /* print totals? */ #ifndef SMALL if (fd == -1) { in = in_tot; out = out_tot; } else #endif { /* read the last 4 bytes - this is the uncompressed size */ rv = lseek(fd, (off_t)(-8), SEEK_END); if (rv != -1) { unsigned char buf[8]; uint32_t usize; rv = read(fd, (char *)buf, sizeof(buf)); if (rv == -1) maybe_warn("read of uncompressed size"); else if (rv != sizeof(buf)) maybe_warnx("read of uncompressed size"); else { usize = le32dec(&buf[4]); in = (off_t)usize; #ifndef SMALL crc = le32dec(&buf[0]); #endif } } } #ifndef SMALL if (vflag && fd == -1) printf(" "); else if (vflag) { char *date = ctime(&ts); /* skip the day, 1/100th second, and year */ date += 4; date[12] = 0; printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date); } in_tot += in; out_tot += out; #else (void)&ts; /* XXX */ #endif print_list_out(out, in, outfile); } static void print_list_out(off_t out, off_t in, const char *outfile) { printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in); print_ratio(in, out, stdout); printf(" %s\n", outfile); } /* display the usage of NetBSD gzip */ static void usage(void) { fprintf(stderr, "%s\n", gzip_version); fprintf(stderr, #ifdef SMALL "usage: %s [-" OPT_LIST "] [ [ ...]]\n", #else "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [ [ ...]]\n" " -1 --fast fastest (worst) compression\n" " -2 .. -8 set compression level\n" " -9 --best best (slowest) compression\n" " -c --stdout write to stdout, keep original files\n" " --to-stdout\n" " -d --decompress uncompress files\n" " --uncompress\n" " -f --force force overwriting & compress links\n" " -h --help display this help\n" " -k --keep don't delete input files during operation\n" " -l --list list compressed file contents\n" " -N --name save or restore original file name and time stamp\n" " -n --no-name don't save original file name or time stamp\n" " -q --quiet output no warnings\n" " -r --recursive recursively compress files in directories\n" " -S .suf use suffix .suf instead of .gz\n" " --suffix .suf\n" " -t --test test compressed file\n" " -V --version display program version\n" " -v --verbose print extra statistics\n", #endif getprogname()); exit(0); } #ifndef SMALL /* display the license information of FreeBSD gzip */ static void display_license(void) { fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version); fprintf(stderr, "%s\n", gzip_copyright); exit(0); } #endif /* display the version of NetBSD gzip */ static void display_version(void) { fprintf(stderr, "%s\n", gzip_version); exit(0); } #ifndef NO_BZIP2_SUPPORT #include "unbzip2.c" #endif #ifndef NO_COMPRESS_SUPPORT #include "zuncompress.c" #endif #ifndef NO_PACK_SUPPORT #include "unpack.c" #endif #ifndef NO_XZ_SUPPORT #include "unxz.c" #endif #ifndef NO_LZ_SUPPORT #include "unlz.c" #endif static ssize_t read_retry(int fd, void *buf, size_t sz) { char *cp = buf; size_t left = MIN(sz, (size_t) SSIZE_MAX); while (left > 0) { ssize_t ret; ret = read(fd, cp, left); if (ret == -1) { return ret; } else if (ret == 0) { break; /* EOF */ } cp += ret; left -= ret; } return sz - left; } static ssize_t write_retry(int fd, const void *buf, size_t sz) { const char *cp = buf; size_t left = MIN(sz, (size_t) SSIZE_MAX); while (left > 0) { ssize_t ret; ret = write(fd, cp, left); if (ret == -1) { return ret; } else if (ret == 0) { abort(); /* Can't happen */ } cp += ret; left -= ret; } return sz - left; } Index: stable/12/usr.bin/gzip/unlz.c =================================================================== --- stable/12/usr.bin/gzip/unlz.c (revision 360185) +++ stable/12/usr.bin/gzip/unlz.c (revision 360186) @@ -1,646 +1,644 @@ /* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */ /*- * Copyright (c) 2018 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* Lzd - Educational decompressor for the lzip format Copyright (C) 2013-2018 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #include #include #include #include #include #include #include #include #define LZ_STATES 12 #define LITERAL_CONTEXT_BITS 3 #define POS_STATE_BITS 2 #define POS_STATES (1 << POS_STATE_BITS) #define POS_STATE_MASK (POS_STATES - 1) #define STATES 4 #define DIS_SLOT_BITS 6 #define DIS_MODEL_START 4 #define DIS_MODEL_END 14 #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2)) #define DIS_ALIGN_BITS 4 #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS) #define LOW_BITS 3 #define MID_BITS 3 #define HIGH_BITS 8 #define LOW_SYMBOLS (1 << LOW_BITS) #define MID_SYMBOLS (1 << MID_BITS) #define HIGH_SYMBOLS (1 << HIGH_BITS) #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS) #define MIN_MATCH_LEN 2 #define BIT_MODEL_MOVE_BITS 5 #define BIT_MODEL_TOTAL_BITS 11 #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS) #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2) static const int lz_st_next[] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, }; static bool lz_st_is_char(int st) { return st < 7; } static int lz_st_get_char(int st) { return lz_st_next[st]; } static int lz_st_get_match(int st) { return st < 7 ? 7 : 10; } static int lz_st_get_rep(int st) { return st < 7 ? 8 : 11; } static int lz_st_get_short_rep(int st) { return st < 7 ? 9 : 11; } struct lz_len_model { int choice1; int choice2; int bm_low[POS_STATES][LOW_SYMBOLS]; int bm_mid[POS_STATES][MID_SYMBOLS]; int bm_high[HIGH_SYMBOLS]; }; static uint32_t lz_crc[256]; static void lz_crc_init(void) { for (unsigned i = 0; i < nitems(lz_crc); i++) { unsigned c = i; for (unsigned j = 0; j < 8; j++) { if (c & 1) c = 0xEDB88320U ^ (c >> 1); else c >>= 1; } lz_crc[i] = c; } } static void lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len) { for (size_t i = 0; i < len; i++) *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8); } struct lz_range_decoder { FILE *fp; uint32_t code; uint32_t range; }; static int lz_rd_create(struct lz_range_decoder *rd, FILE *fp) { rd->fp = fp; rd->code = 0; rd->range = ~0; for (int i = 0; i < 5; i++) rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); return ferror(rd->fp) ? -1 : 0; } static unsigned lz_rd_decode(struct lz_range_decoder *rd, int num_bits) { unsigned symbol = 0; for (int i = num_bits; i > 0; i--) { rd->range >>= 1; symbol <<= 1; if (rd->code >= rd->range) { rd->code -= rd->range; symbol |= 1; } if (rd->range <= 0x00FFFFFFU) { rd->range <<= 8; rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); } } return symbol; } static unsigned lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm) { unsigned symbol; const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm; if(rd->code < bound) { rd->range = bound; *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS; symbol = 0; } else { rd->range -= bound; rd->code -= bound; *bm -= *bm >> BIT_MODEL_MOVE_BITS; symbol = 1; } if (rd->range <= 0x00FFFFFFU) { rd->range <<= 8; rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); } return symbol; } static unsigned lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits) { unsigned symbol = 1; for (int i = 0; i < num_bits; i++) symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); return symbol - (1 << num_bits); } static unsigned lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits) { unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits); unsigned reversed_symbol = 0; for (int i = 0; i < num_bits; i++) { reversed_symbol = (reversed_symbol << 1) | (symbol & 1); symbol >>= 1; } return reversed_symbol; } static unsigned lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte) { unsigned symbol = 1; for (int i = 7; i >= 0; i--) { const unsigned match_bit = (match_byte >> i) & 1; const unsigned bit = lz_rd_decode_bit(rd, &bm[symbol + (match_bit << 8) + 0x100]); symbol = (symbol << 1) | bit; if (match_bit != bit) { while (symbol < 0x100) { symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); } break; } } return symbol & 0xFF; } static unsigned lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm, int pos_state) { if (lz_rd_decode_bit(rd, &lm->choice1) == 0) return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS); if (lz_rd_decode_bit(rd, &lm->choice2) == 0) { return LOW_SYMBOLS + lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS); } return LOW_SYMBOLS + MID_SYMBOLS + lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS); } struct lz_decoder { FILE *fin, *fout; off_t pos, ppos, spos, dict_size; bool wrapped; uint32_t crc; uint8_t *obuf; struct lz_range_decoder rdec; }; static int lz_flush(struct lz_decoder *lz) { off_t offs = lz->pos - lz->spos; if (offs <= 0) return -1; size_t size = (size_t)offs; lz_crc_update(&lz->crc, lz->obuf + lz->spos, size); if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size) return -1; lz->wrapped = lz->pos >= lz->dict_size; if (lz->wrapped) { lz->ppos += lz->pos; lz->pos = 0; } lz->spos = lz->pos; return 0; } static void lz_destroy(struct lz_decoder *lz) { if (lz->fin) fclose(lz->fin); if (lz->fout) fclose(lz->fout); free(lz->obuf); } static int lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size) { memset(lz, 0, sizeof(*lz)); lz->fin = fdopen(dup(fin), "r"); if (lz->fin == NULL) goto out; lz->fout = fdopen(dup(fdout), "w"); if (lz->fout == NULL) goto out; lz->pos = lz->ppos = lz->spos = 0; lz->crc = ~0; lz->dict_size = dict_size; lz->wrapped = false; lz->obuf = malloc(dict_size); if (lz->obuf == NULL) goto out; if (lz_rd_create(&lz->rdec, lz->fin) == -1) goto out; return 0; out: lz_destroy(lz); return -1; } static uint8_t lz_peek(const struct lz_decoder *lz, unsigned ahead) { off_t diff = lz->pos - ahead - 1; if (diff >= 0) return lz->obuf[diff]; if (lz->wrapped) return lz->obuf[lz->dict_size + diff]; return 0; } static void lz_put(struct lz_decoder *lz, uint8_t b) { lz->obuf[lz->pos++] = b; if (lz->dict_size == lz->pos) lz_flush(lz); } static off_t lz_get_data_position(const struct lz_decoder *lz) { return lz->ppos + lz->pos; } static unsigned lz_get_crc(const struct lz_decoder *lz) { return lz->crc ^ 0xffffffffU; } static void lz_bm_init(int *a, size_t l) { for (size_t i = 0; i < l; i++) a[i] = BIT_MODEL_INIT; } #define LZ_BM_INIT(a) lz_bm_init(a, nitems(a)) #define LZ_BM_INIT2(a) do { \ size_t l = nitems(a[0]); \ for (size_t i = 0; i < nitems(a); i++) \ lz_bm_init(a[i], l); \ } while (/*CONSTCOND*/0) #define LZ_MODEL_INIT(a) do { \ a.choice1 = BIT_MODEL_INIT; \ a.choice2 = BIT_MODEL_INIT; \ LZ_BM_INIT2(a.bm_low); \ LZ_BM_INIT2(a.bm_mid); \ LZ_BM_INIT(a.bm_high); \ } while (/*CONSTCOND*/0) static bool lz_decode_member(struct lz_decoder *lz) { int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300]; int bm_match[LZ_STATES][POS_STATES]; int bm_rep[4][LZ_STATES]; int bm_len[LZ_STATES][POS_STATES]; int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS]; int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1]; int bm_align[DIS_ALIGN_SIZE]; LZ_BM_INIT2(bm_literal); LZ_BM_INIT2(bm_match); LZ_BM_INIT2(bm_rep); LZ_BM_INIT2(bm_len); LZ_BM_INIT2(bm_dis_slot); LZ_BM_INIT(bm_dis); LZ_BM_INIT(bm_align); struct lz_len_model match_len_model; struct lz_len_model rep_len_model; LZ_MODEL_INIT(match_len_model); LZ_MODEL_INIT(rep_len_model); struct lz_range_decoder *rd = &lz->rdec; unsigned rep[4] = { 0 }; int state = 0; while (!feof(lz->fin) && !ferror(lz->fin)) { const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK; // bit 1 if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) { const uint8_t prev_byte = lz_peek(lz, 0); const int literal_state = prev_byte >> (8 - LITERAL_CONTEXT_BITS); int *bm = bm_literal[literal_state]; if (lz_st_is_char(state)) lz_put(lz, lz_rd_decode_tree(rd, bm, 8)); else { int peek = lz_peek(lz, rep[0]); lz_put(lz, lz_rd_decode_matched(rd, bm, peek)); } state = lz_st_get_char(state); continue; } int len; // bit 2 if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) { // bit 3 if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) { // bit 4 if (lz_rd_decode_bit(rd, &bm_len[state][pos_state]) == 0) { state = lz_st_get_short_rep(state); lz_put(lz, lz_peek(lz, rep[0])); continue; } } else { unsigned distance; // bit 4 if (lz_rd_decode_bit(rd, &bm_rep[2][state]) == 0) distance = rep[1]; else { // bit 5 if (lz_rd_decode_bit(rd, &bm_rep[3][state]) == 0) distance = rep[2]; else { distance = rep[3]; rep[3] = rep[2]; } rep[2] = rep[1]; } rep[1] = rep[0]; rep[0] = distance; } state = lz_st_get_rep(state); len = MIN_MATCH_LEN + lz_rd_decode_len(rd, &rep_len_model, pos_state); } else { rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; len = MIN_MATCH_LEN + lz_rd_decode_len(rd, &match_len_model, pos_state); const int len_state = MIN(len - MIN_MATCH_LEN, STATES - 1); rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state], DIS_SLOT_BITS); if (rep[0] >= DIS_MODEL_START) { const unsigned dis_slot = rep[0]; const int direct_bits = (dis_slot >> 1) - 1; rep[0] = (2 | (dis_slot & 1)) << direct_bits; if (dis_slot < DIS_MODEL_END) rep[0] += lz_rd_decode_tree_reversed(rd, &bm_dis[rep[0] - dis_slot], direct_bits); else { rep[0] += lz_rd_decode(rd, direct_bits - DIS_ALIGN_BITS) << DIS_ALIGN_BITS; rep[0] += lz_rd_decode_tree_reversed(rd, bm_align, DIS_ALIGN_BITS); if (rep[0] == 0xFFFFFFFFU) { lz_flush(lz); return len == MIN_MATCH_LEN; } } } state = lz_st_get_match(state); if (rep[0] >= lz->dict_size || (rep[0] >= lz->pos && !lz->wrapped)) { lz_flush(lz); return false; } } for (int i = 0; i < len; i++) lz_put(lz, lz_peek(lz, rep[0])); } lz_flush(lz); return false; } /* * 0-3 CRC32 of the uncompressed data * 4-11 size of the uncompressed data * 12-19 member size including header and trailer */ #define TRAILER_SIZE 20 static off_t lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize) { struct lz_decoder lz; off_t rv = -1; if (lz_create(&lz, fin, fdout, dict_size) == -1) return -1; if (!lz_decode_member(&lz)) goto out; uint8_t trailer[TRAILER_SIZE]; for(size_t i = 0; i < nitems(trailer); i++) trailer[i] = (uint8_t)getc(lz.fin); unsigned crc = 0; for (int i = 3; i >= 0; --i) { crc <<= 8; crc += trailer[i]; } int64_t data_size = 0; for (int i = 11; i >= 4; --i) { data_size <<= 8; data_size += trailer[i]; } if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz)) goto out; rv = 0; for (int i = 19; i >= 12; --i) { rv <<= 8; rv += trailer[i]; } if (insize) *insize = rv; #if 0 /* Does not work with pipes */ rv = ftello(lz.fout); #else rv = data_size; #endif out: lz_destroy(&lz); return rv; } /* * 0-3 magic * 4 version * 5 coded dict_size */ #define HDR_SIZE 6 #define MIN_DICTIONARY_SIZE (1 << 12) #define MAX_DICTIONARY_SIZE (1 << 29) static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 }; static unsigned lz_get_dict_size(unsigned char c) { unsigned dict_size = 1 << (c & 0x1f); dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7); if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE) return 0; return dict_size; } static off_t unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in) { if (lz_crc[0] == 0) lz_crc_init(); char header[HDR_SIZE]; - if (prelen > sizeof(header)) - return -1; if (pre && prelen) memcpy(header, pre, prelen); ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen); switch (nr) { case -1: return -1; case 0: return prelen ? -1 : 0; default: if ((size_t)nr != sizeof(header) - prelen) return -1; break; } if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0) return -1; unsigned dict_size = lz_get_dict_size(header[5]); if (dict_size == 0) return -1; return lz_decode(fin, fout, dict_size, bytes_in); } Index: stable/12/usr.bin/gzip/unpack.c =================================================================== --- stable/12/usr.bin/gzip/unpack.c (revision 360185) +++ stable/12/usr.bin/gzip/unpack.c (revision 360186) @@ -1,340 +1,337 @@ /* $FreeBSD$ */ /* $NetBSD: unpack.c,v 1.3 2017/08/04 07:27:08 mrg Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Xin LI * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* This file is #included by gzip.c */ /* * pack(1) file format: * * The first 7 bytes is the header: * 00, 01 - Signature (US, RS), we already validated it earlier. * 02..05 - Uncompressed size * 06 - Level for the huffman tree (<=24) * * pack(1) will then store symbols (leaf) nodes count in each huffman * tree levels, each level would consume 1 byte (See [1]). * * After the symbol count table, there is the symbol table, storing * symbols represented by corresponding leaf node. EOB is not being * explicitly transmitted (not necessary anyway) in the symbol table. * * Compressed data goes after the symbol table. * * NOTES * * [1] If we count EOB into the symbols, that would mean that we will * have at most 256 symbols in the huffman tree. pack(1) rejects empty * file and files that just repeats one character, which means that we * will have at least 2 symbols. Therefore, pack(1) would reduce the * last level symbol count by 2 which makes it a number in * range [0..254], so all levels' symbol count would fit into 1 byte. */ #define PACK_HEADER_LENGTH 7 #define HTREE_MAXLEVEL 24 /* * unpack descriptor * * Represent the huffman tree in a similar way that pack(1) would * store in a packed file. We store all symbols in a linear table, * and store pointers to each level's first symbol. In addition to * that, maintain two counts for each level: inner nodes count and * leaf nodes count. */ typedef struct { int symbol_size; /* Size of the symbol table */ int treelevels; /* Levels for the huffman tree */ int *symbolsin; /* Table of leaf symbols count in each * level */ int *inodesin; /* Table of internal nodes count in * each level */ char *symbol; /* The symbol table */ char *symbol_eob; /* Pointer to the EOB symbol */ char **tree; /* Decoding huffman tree (pointers to * first symbol of each tree level */ off_t uncompressed_size; /* Uncompressed size */ FILE *fpIn; /* Input stream */ FILE *fpOut; /* Output stream */ } unpack_descriptor_t; /* * Release resource allocated to an unpack descriptor. * * Caller is responsible to make sure that all of these pointers are * initialized (in our case, they all point to valid memory block). * We don't zero out pointers here because nobody else would ever * reference the memory block without scrubbing them. */ static void unpack_descriptor_fini(unpack_descriptor_t *unpackd) { free(unpackd->symbolsin); free(unpackd->inodesin); free(unpackd->symbol); free(unpackd->tree); fclose(unpackd->fpIn); fclose(unpackd->fpOut); } /* * Recursively fill the internal node count table */ static void unpackd_fill_inodesin(const unpack_descriptor_t *unpackd, int level) { /* * The internal nodes would be 1/2 of total internal nodes and * leaf nodes in the next level. For the last level there * would be no internal node by definition. */ if (level < unpackd->treelevels) { unpackd_fill_inodesin(unpackd, level + 1); unpackd->inodesin[level] = (unpackd->inodesin[level + 1] + unpackd->symbolsin[level + 1]) / 2; } else unpackd->inodesin[level] = 0; } /* * Update counter for accepted bytes */ static void accepted_bytes(off_t *bytes_in, off_t newbytes) { if (bytes_in != NULL) (*bytes_in) += newbytes; } /* * Read file header and construct the tree. Also, prepare the buffered I/O * for decode routine. * * Return value is uncompressed size. */ static void unpack_parse_header(int in, int out, char *pre, size_t prelen, off_t *bytes_in, unpack_descriptor_t *unpackd) { unsigned char hdr[PACK_HEADER_LENGTH]; /* buffer for header */ ssize_t bytesread; /* Bytes read from the file */ int i, j, thisbyte; - if (prelen > sizeof hdr) - maybe_err("prelen too long"); - /* Prepend the header buffer if we already read some data */ if (prelen != 0) memcpy(hdr, pre, prelen); /* Read in and fill the rest bytes of header */ bytesread = read(in, hdr + prelen, PACK_HEADER_LENGTH - prelen); if (bytesread < 0) maybe_err("Error reading pack header"); infile_newdata(bytesread); accepted_bytes(bytes_in, PACK_HEADER_LENGTH); /* Obtain uncompressed length (bytes 2,3,4,5) */ unpackd->uncompressed_size = 0; for (i = 2; i <= 5; i++) { unpackd->uncompressed_size <<= 8; unpackd->uncompressed_size |= hdr[i]; } /* Get the levels of the tree */ unpackd->treelevels = hdr[6]; if (unpackd->treelevels > HTREE_MAXLEVEL || unpackd->treelevels < 1) maybe_errx("Huffman tree has insane levels"); /* Let libc take care for buffering from now on */ if ((unpackd->fpIn = fdopen(in, "r")) == NULL) maybe_err("Can not fdopen() input stream"); if ((unpackd->fpOut = fdopen(out, "w")) == NULL) maybe_err("Can not fdopen() output stream"); /* Allocate for the tables of bounds and the tree itself */ unpackd->inodesin = calloc(unpackd->treelevels, sizeof(*(unpackd->inodesin))); unpackd->symbolsin = calloc(unpackd->treelevels, sizeof(*(unpackd->symbolsin))); unpackd->tree = calloc(unpackd->treelevels, (sizeof(*(unpackd->tree)))); if (unpackd->inodesin == NULL || unpackd->symbolsin == NULL || unpackd->tree == NULL) maybe_err("calloc"); /* We count from 0 so adjust to match array upper bound */ unpackd->treelevels--; /* Read the levels symbol count table and calculate total */ unpackd->symbol_size = 1; /* EOB */ for (i = 0; i <= unpackd->treelevels; i++) { if ((thisbyte = fgetc(unpackd->fpIn)) == EOF) maybe_err("File appears to be truncated"); unpackd->symbolsin[i] = (unsigned char)thisbyte; unpackd->symbol_size += unpackd->symbolsin[i]; } accepted_bytes(bytes_in, unpackd->treelevels); if (unpackd->symbol_size > 256) maybe_errx("Bad symbol table"); infile_newdata(unpackd->treelevels); /* Allocate for the symbol table, point symbol_eob at the beginning */ unpackd->symbol_eob = unpackd->symbol = calloc(1, unpackd->symbol_size); if (unpackd->symbol == NULL) maybe_err("calloc"); /* * Read in the symbol table, which contain [2, 256] symbols. * In order to fit the count in one byte, pack(1) would offset * it by reducing 2 from the actual number from the last level. * * We adjust the last level's symbol count by 1 here, because * the EOB symbol is not being transmitted explicitly. Another * adjustment would be done later afterward. */ unpackd->symbolsin[unpackd->treelevels]++; for (i = 0; i <= unpackd->treelevels; i++) { unpackd->tree[i] = unpackd->symbol_eob; for (j = 0; j < unpackd->symbolsin[i]; j++) { if ((thisbyte = fgetc(unpackd->fpIn)) == EOF) maybe_errx("Symbol table truncated"); *unpackd->symbol_eob++ = (char)thisbyte; } infile_newdata(unpackd->symbolsin[i]); accepted_bytes(bytes_in, unpackd->symbolsin[i]); } /* Now, take account for the EOB symbol as well */ unpackd->symbolsin[unpackd->treelevels]++; /* * The symbolsin table has been constructed now. * Calculate the internal nodes count table based on it. */ unpackd_fill_inodesin(unpackd, 0); } /* * Decode huffman stream, based on the huffman tree. */ static void unpack_decode(const unpack_descriptor_t *unpackd, off_t *bytes_in) { int thislevel, thiscode, thisbyte, inlevelindex; int i; off_t bytes_out = 0; const char *thissymbol; /* The symbol pointer decoded from stream */ /* * Decode huffman. Fetch every bytes from the file, get it * into 'thiscode' bit-by-bit, then output the symbol we got * when one has been found. * * Assumption: sizeof(int) > ((max tree levels + 1) / 8). * bad things could happen if not. */ thislevel = 0; thiscode = thisbyte = 0; while ((thisbyte = fgetc(unpackd->fpIn)) != EOF) { accepted_bytes(bytes_in, 1); infile_newdata(1); check_siginfo(); /* * Split one bit from thisbyte, from highest to lowest, * feed the bit into thiscode, until we got a symbol from * the tree. */ for (i = 7; i >= 0; i--) { thiscode = (thiscode << 1) | ((thisbyte >> i) & 1); /* Did we got a symbol? (referencing leaf node) */ if (thiscode >= unpackd->inodesin[thislevel]) { inlevelindex = thiscode - unpackd->inodesin[thislevel]; if (inlevelindex > unpackd->symbolsin[thislevel]) maybe_errx("File corrupt"); thissymbol = &(unpackd->tree[thislevel][inlevelindex]); if ((thissymbol == unpackd->symbol_eob) && (bytes_out == unpackd->uncompressed_size)) goto finished; fputc((*thissymbol), unpackd->fpOut); bytes_out++; /* Prepare for next input */ thislevel = 0; thiscode = 0; } else { thislevel++; if (thislevel > unpackd->treelevels) maybe_errx("File corrupt"); } } } finished: if (bytes_out != unpackd->uncompressed_size) maybe_errx("Premature EOF"); } /* Handler for pack(1)'ed file */ static off_t unpack(int in, int out, char *pre, size_t prelen, off_t *bytes_in) { unpack_descriptor_t unpackd; in = dup(in); if (in == -1) maybe_err("dup"); out = dup(out); if (out == -1) maybe_err("dup"); unpack_parse_header(in, out, pre, prelen, bytes_in, &unpackd); unpack_decode(&unpackd, bytes_in); unpack_descriptor_fini(&unpackd); /* If we reached here, the unpack was successful */ return (unpackd.uncompressed_size); } Index: stable/12 =================================================================== --- stable/12 (revision 360185) +++ stable/12 (revision 360186) Property changes on: stable/12 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r358988