diff --git a/libarchive/archive_read_support_filter_uu.c b/libarchive/archive_read_support_filter_uu.c index 787a619f2f3a..f0fc14870123 100644 --- a/libarchive/archive_read_support_filter_uu.c +++ b/libarchive/archive_read_support_filter_uu.c @@ -1,695 +1,696 @@ /*- * Copyright (c) 2009-2011 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #include "archive.h" #include "archive_private.h" #include "archive_read_private.h" /* Maximum lookahead during bid phase */ #define UUENCODE_BID_MAX_READ 128*1024 /* in bytes */ struct uudecode { int64_t total; unsigned char *in_buff; #define IN_BUFF_SIZE (1024) int in_cnt; size_t in_allocated; unsigned char *out_buff; #define OUT_BUFF_SIZE (64 * 1024) int state; #define ST_FIND_HEAD 0 #define ST_READ_UU 1 #define ST_UUEND 2 #define ST_READ_BASE64 3 #define ST_IGNORE 4 }; static int uudecode_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *filter); static int uudecode_bidder_init(struct archive_read_filter *); static ssize_t uudecode_filter_read(struct archive_read_filter *, const void **); static int uudecode_filter_close(struct archive_read_filter *); #if ARCHIVE_VERSION_NUMBER < 4000000 /* Deprecated; remove in libarchive 4.0 */ int archive_read_support_compression_uu(struct archive *a) { return archive_read_support_filter_uu(a); } #endif int archive_read_support_filter_uu(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct archive_read_filter_bidder *bidder; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_filter_uu"); if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) return (ARCHIVE_FATAL); bidder->data = NULL; bidder->name = "uu"; bidder->bid = uudecode_bidder_bid; bidder->init = uudecode_bidder_init; bidder->options = NULL; bidder->free = NULL; return (ARCHIVE_OK); } static const unsigned char ascii[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\n', 0, 0, '\r', 0, 0, /* 00 - 0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 30 - 3F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ }; static const unsigned char uuchar[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 30 - 3F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70 - 7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ }; static const unsigned char base64[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, /* 20 - 2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 30 - 3F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 50 - 5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 70 - 7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ }; static const int base64num[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, /* 20 - 2F */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, /* 30 - 3F */ 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40 - 4F */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, /* 50 - 5F */ 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 60 - 6F */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, /* 70 - 7F */ }; static ssize_t get_line(const unsigned char *b, ssize_t avail, ssize_t *nlsize) { ssize_t len; len = 0; while (len < avail) { switch (ascii[*b]) { case 0: /* Non-ascii character or control character. */ if (nlsize != NULL) *nlsize = 0; return (-1); case '\r': if (avail-len > 1 && b[1] == '\n') { if (nlsize != NULL) *nlsize = 2; return (len+2); } /* FALL THROUGH */ case '\n': if (nlsize != NULL) *nlsize = 1; return (len+1); case 1: b++; len++; break; } } if (nlsize != NULL) *nlsize = 0; return (avail); } static ssize_t bid_get_line(struct archive_read_filter *filter, const unsigned char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl, size_t* nbytes_read) { ssize_t len; int quit; quit = 0; if (*avail == 0) { *nl = 0; len = 0; } else len = get_line(*b, *avail, nl); /* * Read bytes more while it does not reach the end of line. */ while (*nl == 0 && len == *avail && !quit && *nbytes_read < UUENCODE_BID_MAX_READ) { ssize_t diff = *ravail - *avail; size_t nbytes_req = (*ravail+1023) & ~1023U; ssize_t tested; /* Increase reading bytes if it is not enough to at least * new two lines. */ if (nbytes_req < (size_t)*ravail + 160) nbytes_req <<= 1; *b = __archive_read_filter_ahead(filter, nbytes_req, avail); if (*b == NULL) { if (*ravail >= *avail) return (0); /* Reading bytes reaches the end of a stream. */ *b = __archive_read_filter_ahead(filter, *avail, avail); quit = 1; } *nbytes_read = *avail; *ravail = *avail; *b += diff; *avail -= diff; tested = len;/* Skip some bytes we already determinated. */ len = get_line(*b + tested, *avail - tested, nl); if (len >= 0) len += tested; } return (len); } #define UUDECODE(c) (((c) - 0x20) & 0x3f) static int uudecode_bidder_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter) { const unsigned char *b; ssize_t avail, ravail; ssize_t len, nl; int l; int firstline; size_t nbytes_read; (void)self; /* UNUSED */ b = __archive_read_filter_ahead(filter, 1, &avail); if (b == NULL) return (0); firstline = 20; ravail = avail; nbytes_read = avail; for (;;) { len = bid_get_line(filter, &b, &avail, &ravail, &nl, &nbytes_read); if (len < 0 || nl == 0) return (0); /* No match found. */ if (len - nl >= 11 && memcmp(b, "begin ", 6) == 0) l = 6; else if (len -nl >= 18 && memcmp(b, "begin-base64 ", 13) == 0) l = 13; else l = 0; if (l > 0 && (b[l] < '0' || b[l] > '7' || b[l+1] < '0' || b[l+1] > '7' || b[l+2] < '0' || b[l+2] > '7' || b[l+3] != ' ')) l = 0; b += len; avail -= len; if (l) break; firstline = 0; /* Do not read more than UUENCODE_BID_MAX_READ bytes */ if (nbytes_read >= UUENCODE_BID_MAX_READ) return (0); } if (!avail) return (0); len = bid_get_line(filter, &b, &avail, &ravail, &nl, &nbytes_read); if (len < 0 || nl == 0) return (0);/* There are non-ascii characters. */ avail -= len; if (l == 6) { + /* "begin " */ if (!uuchar[*b]) return (0); /* Get a length of decoded bytes. */ l = UUDECODE(*b++); len--; if (l > 45) /* Normally, maximum length is 45(character 'M'). */ return (0); while (l && len-nl > 0) { if (l > 0) { if (!uuchar[*b++]) return (0); if (!uuchar[*b++]) return (0); len -= 2; --l; } if (l > 0) { if (!uuchar[*b++]) return (0); --len; --l; } if (l > 0) { if (!uuchar[*b++]) return (0); --len; --l; } } if (len-nl < 0) return (0); if (len-nl == 1 && (uuchar[*b] || /* Check sum. */ (*b >= 'a' && *b <= 'z'))) {/* Padding data(MINIX). */ ++b; --len; } b += nl; if (avail && uuchar[*b]) return (firstline+30); - } - if (l == 13) { + } else if (l == 13) { + /* "begin-base64 " */ while (len-nl > 0) { if (!base64[*b++]) return (0); --len; } b += nl; if (avail >= 5 && memcmp(b, "====\n", 5) == 0) return (firstline+40); if (avail >= 6 && memcmp(b, "====\r\n", 6) == 0) return (firstline+40); if (avail > 0 && base64[*b]) return (firstline+30); } return (0); } static int uudecode_bidder_init(struct archive_read_filter *self) { struct uudecode *uudecode; void *out_buff; void *in_buff; self->code = ARCHIVE_FILTER_UU; self->name = "uu"; self->read = uudecode_filter_read; self->skip = NULL; /* not supported */ self->close = uudecode_filter_close; uudecode = (struct uudecode *)calloc(sizeof(*uudecode), 1); out_buff = malloc(OUT_BUFF_SIZE); in_buff = malloc(IN_BUFF_SIZE); if (uudecode == NULL || out_buff == NULL || in_buff == NULL) { archive_set_error(&self->archive->archive, ENOMEM, "Can't allocate data for uudecode"); free(uudecode); free(out_buff); free(in_buff); return (ARCHIVE_FATAL); } self->data = uudecode; uudecode->in_buff = in_buff; uudecode->in_cnt = 0; uudecode->in_allocated = IN_BUFF_SIZE; uudecode->out_buff = out_buff; uudecode->state = ST_FIND_HEAD; return (ARCHIVE_OK); } static int ensure_in_buff_size(struct archive_read_filter *self, struct uudecode *uudecode, size_t size) { if (size > uudecode->in_allocated) { unsigned char *ptr; size_t newsize; /* * Calculate a new buffer size for in_buff. * Increase its value until it has enough size we need. */ newsize = uudecode->in_allocated; do { if (newsize < IN_BUFF_SIZE*32) newsize <<= 1; else newsize += IN_BUFF_SIZE; } while (size > newsize); /* Allocate the new buffer. */ ptr = malloc(newsize); if (ptr == NULL) { free(ptr); archive_set_error(&self->archive->archive, ENOMEM, "Can't allocate data for uudecode"); return (ARCHIVE_FATAL); } /* Move the remaining data in in_buff into the new buffer. */ if (uudecode->in_cnt) memmove(ptr, uudecode->in_buff, uudecode->in_cnt); /* Replace in_buff with the new buffer. */ free(uudecode->in_buff); uudecode->in_buff = ptr; uudecode->in_allocated = newsize; } return (ARCHIVE_OK); } static ssize_t uudecode_filter_read(struct archive_read_filter *self, const void **buff) { struct uudecode *uudecode; const unsigned char *b, *d; unsigned char *out; ssize_t avail_in, ravail; ssize_t used; ssize_t total; ssize_t len, llen, nl; uudecode = (struct uudecode *)self->data; read_more: d = __archive_read_filter_ahead(self->upstream, 1, &avail_in); if (d == NULL && avail_in < 0) return (ARCHIVE_FATAL); /* Quiet a code analyzer; make sure avail_in must be zero * when d is NULL. */ if (d == NULL) avail_in = 0; used = 0; total = 0; out = uudecode->out_buff; ravail = avail_in; if (uudecode->state == ST_IGNORE) { used = avail_in; goto finish; } if (uudecode->in_cnt) { /* * If there is remaining data which is saved by * previous calling, use it first. */ if (ensure_in_buff_size(self, uudecode, avail_in + uudecode->in_cnt) != ARCHIVE_OK) return (ARCHIVE_FATAL); memcpy(uudecode->in_buff + uudecode->in_cnt, d, avail_in); d = uudecode->in_buff; avail_in += uudecode->in_cnt; uudecode->in_cnt = 0; } for (;used < avail_in; d += llen, used += llen) { int64_t l, body; b = d; len = get_line(b, avail_in - used, &nl); if (len < 0) { /* Non-ascii character is found. */ if (uudecode->state == ST_FIND_HEAD && (uudecode->total > 0 || total > 0)) { uudecode->state = ST_IGNORE; used = avail_in; goto finish; } archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } llen = len; if ((nl == 0) && (uudecode->state != ST_UUEND)) { /* * Save remaining data which does not contain * NL('\n','\r'). */ if (ensure_in_buff_size(self, uudecode, len) != ARCHIVE_OK) return (ARCHIVE_FATAL); if (uudecode->in_buff != b) memmove(uudecode->in_buff, b, len); uudecode->in_cnt = (int)len; if (total == 0) { /* Do not return 0; it means end-of-file. * We should try to read bytes more. */ __archive_read_filter_consume( self->upstream, ravail); goto read_more; } used += len; break; } switch (uudecode->state) { default: case ST_FIND_HEAD: /* Do not read more than UUENCODE_BID_MAX_READ bytes */ if (total + len >= UUENCODE_BID_MAX_READ) { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid format data"); return (ARCHIVE_FATAL); } if (len - nl >= 11 && memcmp(b, "begin ", 6) == 0) l = 6; else if (len - nl >= 18 && memcmp(b, "begin-base64 ", 13) == 0) l = 13; else l = 0; if (l != 0 && b[l] >= '0' && b[l] <= '7' && b[l+1] >= '0' && b[l+1] <= '7' && b[l+2] >= '0' && b[l+2] <= '7' && b[l+3] == ' ') { if (l == 6) uudecode->state = ST_READ_UU; else uudecode->state = ST_READ_BASE64; } break; case ST_READ_UU: if (total + len * 2 > OUT_BUFF_SIZE) goto finish; body = len - nl; if (!uuchar[*b] || body <= 0) { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } /* Get length of undecoded bytes of curent line. */ l = UUDECODE(*b++); body--; if (l > body) { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } if (l == 0) { uudecode->state = ST_UUEND; break; } while (l > 0) { int n = 0; if (l > 0) { if (!uuchar[b[0]] || !uuchar[b[1]]) break; n = UUDECODE(*b++) << 18; n |= UUDECODE(*b++) << 12; *out++ = n >> 16; total++; --l; } if (l > 0) { if (!uuchar[b[0]]) break; n |= UUDECODE(*b++) << 6; *out++ = (n >> 8) & 0xFF; total++; --l; } if (l > 0) { if (!uuchar[b[0]]) break; n |= UUDECODE(*b++); *out++ = n & 0xFF; total++; --l; } } if (l) { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } break; case ST_UUEND: if (len - nl == 3 && memcmp(b, "end ", 3) == 0) uudecode->state = ST_FIND_HEAD; else { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } break; case ST_READ_BASE64: if (total + len * 2 > OUT_BUFF_SIZE) goto finish; l = len - nl; if (l >= 3 && b[0] == '=' && b[1] == '=' && b[2] == '=') { uudecode->state = ST_FIND_HEAD; break; } while (l > 0) { int n = 0; if (l > 0) { if (!base64[b[0]] || !base64[b[1]]) break; n = base64num[*b++] << 18; n |= base64num[*b++] << 12; *out++ = n >> 16; total++; l -= 2; } if (l > 0) { if (*b == '=') break; if (!base64[*b]) break; n |= base64num[*b++] << 6; *out++ = (n >> 8) & 0xFF; total++; --l; } if (l > 0) { if (*b == '=') break; if (!base64[*b]) break; n |= base64num[*b++]; *out++ = n & 0xFF; total++; --l; } } if (l && *b != '=') { archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, "Insufficient compressed data"); return (ARCHIVE_FATAL); } break; } } finish: if (ravail < avail_in) used -= avail_in - ravail; __archive_read_filter_consume(self->upstream, used); *buff = uudecode->out_buff; uudecode->total += total; return (total); } static int uudecode_filter_close(struct archive_read_filter *self) { struct uudecode *uudecode; uudecode = (struct uudecode *)self->data; free(uudecode->in_buff); free(uudecode->out_buff); free(uudecode); return (ARCHIVE_OK); } diff --git a/libarchive/archive_read_support_format_cab.c b/libarchive/archive_read_support_format_cab.c index fc70684afa04..099f4a83dcac 100644 --- a/libarchive/archive_read_support_format_cab.c +++ b/libarchive/archive_read_support_format_cab.c @@ -1,3350 +1,3351 @@ /*- * Copyright (c) 2010-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_ZLIB_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_private.h" #include "archive_read_private.h" #include "archive_endian.h" struct lzx_dec { /* Decoding status. */ int state; /* * Window to see last decoded data, from 32KBi to 2MBi. */ int w_size; int w_mask; /* Window buffer, which is a loop buffer. */ unsigned char *w_buff; /* The insert position to the window. */ int w_pos; /* The position where we can copy decoded code from the window. */ int copy_pos; /* The length how many bytes we can copy decoded code from * the window. */ int copy_len; /* Translation reversal for x86 proccessor CALL byte sequence(E8). * This is used for LZX only. */ uint32_t translation_size; char translation; char block_type; #define VERBATIM_BLOCK 1 #define ALIGNED_OFFSET_BLOCK 2 #define UNCOMPRESSED_BLOCK 3 size_t block_size; size_t block_bytes_avail; /* Repeated offset. */ int r0, r1, r2; unsigned char rbytes[4]; int rbytes_avail; int length_header; int position_slot; int offset_bits; struct lzx_pos_tbl { int base; int footer_bits; } *pos_tbl; /* * Bit stream reader. */ struct lzx_br { #define CACHE_TYPE uint64_t #define CACHE_BITS (8 * sizeof(CACHE_TYPE)) /* Cache buffer. */ CACHE_TYPE cache_buffer; /* Indicates how many bits avail in cache_buffer. */ int cache_avail; unsigned char odd; char have_odd; } br; /* * Huffman coding. */ struct huffman { int len_size; int freq[17]; unsigned char *bitlen; /* * Use a index table. It's faster than searching a huffman * coding tree, which is a binary tree. But a use of a large * index table causes L1 cache read miss many times. */ #define HTBL_BITS 10 int max_bits; int shift_bits; int tbl_bits; int tree_used; int tree_avail; /* Direct access table. */ uint16_t *tbl; /* Binary tree table for extra bits over the direct access. */ struct htree_t { uint16_t left; uint16_t right; } *tree; } at, lt, mt, pt; int loop; int error; }; static const int slots[] = { 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290 }; #define SLOT_BASE 15 #define SLOT_MAX 21/*->25*/ struct lzx_stream { const unsigned char *next_in; int64_t avail_in; int64_t total_in; unsigned char *next_out; int64_t avail_out; int64_t total_out; struct lzx_dec *ds; }; /* * Cabinet file definitions. */ /* CFHEADER offset */ #define CFHEADER_signature 0 #define CFHEADER_cbCabinet 8 #define CFHEADER_coffFiles 16 #define CFHEADER_versionMinor 24 #define CFHEADER_versionMajor 25 #define CFHEADER_cFolders 26 #define CFHEADER_cFiles 28 #define CFHEADER_flags 30 #define CFHEADER_setID 32 #define CFHEADER_iCabinet 34 #define CFHEADER_cbCFHeader 36 #define CFHEADER_cbCFFolder 38 #define CFHEADER_cbCFData 39 /* CFFOLDER offset */ #define CFFOLDER_coffCabStart 0 #define CFFOLDER_cCFData 4 #define CFFOLDER_typeCompress 6 #define CFFOLDER_abReserve 8 /* CFFILE offset */ #define CFFILE_cbFile 0 #define CFFILE_uoffFolderStart 4 #define CFFILE_iFolder 8 #define CFFILE_date_time 10 #define CFFILE_attribs 14 /* CFDATA offset */ #define CFDATA_csum 0 #define CFDATA_cbData 4 #define CFDATA_cbUncomp 6 static const char *compression_name[] = { "NONE", "MSZIP", "Quantum", "LZX", }; struct cfdata { /* Sum value of this CFDATA. */ uint32_t sum; uint16_t compressed_size; uint16_t compressed_bytes_remaining; uint16_t uncompressed_size; uint16_t uncompressed_bytes_remaining; /* To know how many bytes we have decompressed. */ uint16_t uncompressed_avail; /* Offset from the beginning of compressed data of this CFDATA */ uint16_t read_offset; int64_t unconsumed; /* To keep memory image of this CFDATA to compute the sum. */ size_t memimage_size; unsigned char *memimage; /* Result of calculation of sum. */ uint32_t sum_calculated; unsigned char sum_extra[4]; int sum_extra_avail; const void *sum_ptr; }; struct cffolder { uint32_t cfdata_offset_in_cab; uint16_t cfdata_count; uint16_t comptype; #define COMPTYPE_NONE 0x0000 #define COMPTYPE_MSZIP 0x0001 #define COMPTYPE_QUANTUM 0x0002 #define COMPTYPE_LZX 0x0003 uint16_t compdata; const char *compname; /* At the time reading CFDATA */ struct cfdata cfdata; int cfdata_index; /* Flags to mark progress of decompression. */ char decompress_init; }; struct cffile { uint32_t uncompressed_size; uint32_t offset; time_t mtime; uint16_t folder; #define iFoldCONTINUED_FROM_PREV 0xFFFD #define iFoldCONTINUED_TO_NEXT 0xFFFE #define iFoldCONTINUED_PREV_AND_NEXT 0xFFFF unsigned char attr; #define ATTR_RDONLY 0x01 #define ATTR_NAME_IS_UTF 0x80 struct archive_string pathname; }; struct cfheader { /* Total bytes of all file size in a Cabinet. */ uint32_t total_bytes; uint32_t files_offset; uint16_t folder_count; uint16_t file_count; uint16_t flags; #define PREV_CABINET 0x0001 #define NEXT_CABINET 0x0002 #define RESERVE_PRESENT 0x0004 uint16_t setid; uint16_t cabinet; /* Version number. */ unsigned char major; unsigned char minor; unsigned char cffolder; unsigned char cfdata; /* All folders in a cabinet. */ struct cffolder *folder_array; /* All files in a cabinet. */ struct cffile *file_array; int file_index; }; struct cab { /* entry_bytes_remaining is the number of bytes we expect. */ int64_t entry_offset; int64_t entry_bytes_remaining; int64_t entry_unconsumed; int64_t entry_compressed_bytes_read; int64_t entry_uncompressed_bytes_read; struct cffolder *entry_cffolder; struct cffile *entry_cffile; struct cfdata *entry_cfdata; /* Offset from beginning of a cabinet file. */ int64_t cab_offset; struct cfheader cfheader; struct archive_wstring ws; /* Flag to mark progress that an archive was read their first header.*/ char found_header; char end_of_archive; char end_of_entry; char end_of_entry_cleanup; char read_data_invoked; int64_t bytes_skipped; unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; int init_default_conversion; struct archive_string_conv *sconv; struct archive_string_conv *sconv_default; struct archive_string_conv *sconv_utf8; char format_name[64]; #ifdef HAVE_ZLIB_H z_stream stream; char stream_valid; #endif struct lzx_stream xstrm; }; static int archive_read_format_cab_bid(struct archive_read *, int); static int archive_read_format_cab_options(struct archive_read *, const char *, const char *); static int archive_read_format_cab_read_header(struct archive_read *, struct archive_entry *); static int archive_read_format_cab_read_data(struct archive_read *, const void **, size_t *, int64_t *); static int archive_read_format_cab_read_data_skip(struct archive_read *); static int archive_read_format_cab_cleanup(struct archive_read *); static int cab_skip_sfx(struct archive_read *); static time_t cab_dos_time(const unsigned char *); static int cab_read_data(struct archive_read *, const void **, size_t *, int64_t *); static int cab_read_header(struct archive_read *); static uint32_t cab_checksum_cfdata_4(const void *, size_t bytes, uint32_t); static uint32_t cab_checksum_cfdata(const void *, size_t bytes, uint32_t); static void cab_checksum_update(struct archive_read *, size_t); static int cab_checksum_finish(struct archive_read *); static int cab_next_cfdata(struct archive_read *); static const void *cab_read_ahead_cfdata(struct archive_read *, ssize_t *); static const void *cab_read_ahead_cfdata_none(struct archive_read *, ssize_t *); static const void *cab_read_ahead_cfdata_deflate(struct archive_read *, ssize_t *); static const void *cab_read_ahead_cfdata_lzx(struct archive_read *, ssize_t *); static int64_t cab_consume_cfdata(struct archive_read *, int64_t); static int64_t cab_minimum_consume_cfdata(struct archive_read *, int64_t); static int lzx_decode_init(struct lzx_stream *, int); static int lzx_read_blocks(struct lzx_stream *, int); static int lzx_decode_blocks(struct lzx_stream *, int); static void lzx_decode_free(struct lzx_stream *); static void lzx_translation(struct lzx_stream *, void *, size_t, uint32_t); static void lzx_cleanup_bitstream(struct lzx_stream *); static int lzx_decode(struct lzx_stream *, int); static int lzx_read_pre_tree(struct lzx_stream *); static int lzx_read_bitlen(struct lzx_stream *, struct huffman *, int); static int lzx_huffman_init(struct huffman *, size_t, int); static void lzx_huffman_free(struct huffman *); static int lzx_make_huffman_table(struct huffman *); static inline int lzx_decode_huffman(struct huffman *, unsigned); static int lzx_decode_huffman_tree(struct huffman *, unsigned, int); int archive_read_support_format_cab(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct cab *cab; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_cab"); cab = (struct cab *)calloc(1, sizeof(*cab)); if (cab == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate CAB data"); return (ARCHIVE_FATAL); } archive_string_init(&cab->ws); archive_wstring_ensure(&cab->ws, 256); r = __archive_read_register_format(a, cab, "cab", archive_read_format_cab_bid, archive_read_format_cab_options, archive_read_format_cab_read_header, archive_read_format_cab_read_data, archive_read_format_cab_read_data_skip, NULL, archive_read_format_cab_cleanup, NULL, NULL); if (r != ARCHIVE_OK) free(cab); return (ARCHIVE_OK); } static int find_cab_magic(const char *p) { switch (p[4]) { case 0: /* * Note: Self-Extraction program has 'MSCF' string in their * program. If we were finding 'MSCF' string only, we got * wrong place for Cabinet header, thus, we have to check * following four bytes which are reserved and must be set * to zero. */ if (memcmp(p, "MSCF\0\0\0\0", 8) == 0) return 0; return 5; case 'F': return 1; case 'C': return 2; case 'S': return 3; case 'M': return 4; default: return 5; } } static int archive_read_format_cab_bid(struct archive_read *a, int best_bid) { const char *p; ssize_t bytes_avail, offset, window; /* If there's already a better bid than we can ever make, don't bother testing. */ if (best_bid > 64) return (-1); if ((p = __archive_read_ahead(a, 8, NULL)) == NULL) return (-1); if (memcmp(p, "MSCF\0\0\0\0", 8) == 0) return (64); /* * Attempt to handle self-extracting archives * by noting a PE header and searching forward * up to 128k for a 'MSCF' marker. */ if (p[0] == 'M' && p[1] == 'Z') { offset = 0; window = 4096; while (offset < (1024 * 128)) { const char *h = __archive_read_ahead(a, offset + window, &bytes_avail); if (h == NULL) { /* Remaining bytes are less than window. */ window >>= 1; if (window < 128) return (0); continue; } p = h + offset; while (p + 8 < h + bytes_avail) { int next; if ((next = find_cab_magic(p)) == 0) return (64); p += next; } offset = p - h; } } return (0); } static int archive_read_format_cab_options(struct archive_read *a, const char *key, const char *val) { struct cab *cab; int ret = ARCHIVE_FAILED; cab = (struct cab *)(a->format->data); if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "cab: hdrcharset option needs a character-set name"); else { cab->sconv = archive_string_conversion_from_charset( &a->archive, val, 0); if (cab->sconv != NULL) ret = ARCHIVE_OK; else ret = ARCHIVE_FATAL; } return (ret); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } static int cab_skip_sfx(struct archive_read *a) { const char *p, *q; size_t skip; ssize_t bytes, window; window = 4096; for (;;) { const char *h = __archive_read_ahead(a, window, &bytes); if (h == NULL) { /* Remaining size are less than window. */ window >>= 1; if (window < 128) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Couldn't find out CAB header"); return (ARCHIVE_FATAL); } continue; } p = h; q = p + bytes; /* * Scan ahead until we find something that looks * like the cab header. */ while (p + 8 < q) { int next; if ((next = find_cab_magic(p)) == 0) { skip = p - h; __archive_read_consume(a, skip); return (ARCHIVE_OK); } p += next; } skip = p - h; __archive_read_consume(a, skip); } } static int truncated_error(struct archive_read *a) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated CAB header"); return (ARCHIVE_FATAL); } static ssize_t cab_strnlen(const unsigned char *p, size_t maxlen) { size_t i; for (i = 0; i <= maxlen; i++) { if (p[i] == 0) break; } if (i > maxlen) return (-1);/* invalid */ return ((ssize_t)i); } /* Read bytes as much as remaining. */ static const void * cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail) { const void *p; while (min > 0) { p = __archive_read_ahead(a, min, avail); if (p != NULL) return (p); min--; } return (NULL); } /* Convert a path separator '\' -> '/' */ static int cab_convert_path_separator_1(struct archive_string *fn, unsigned char attr) { size_t i; int mb; /* Easy check if we have '\' in multi-byte string. */ mb = 0; for (i = 0; i < archive_strlen(fn); i++) { if (fn->s[i] == '\\') { if (mb) { /* This may be second byte of multi-byte * character. */ break; } fn->s[i] = '/'; mb = 0; } else if ((fn->s[i] & 0x80) && !(attr & ATTR_NAME_IS_UTF)) mb = 1; else mb = 0; } if (i == archive_strlen(fn)) return (0); return (-1); } /* * Replace a character '\' with '/' in wide character. */ static void cab_convert_path_separator_2(struct cab *cab, struct archive_entry *entry) { const wchar_t *wp; size_t i; /* If a conversion to wide character failed, force the replacement. */ if ((wp = archive_entry_pathname_w(entry)) != NULL) { archive_wstrcpy(&(cab->ws), wp); for (i = 0; i < archive_strlen(&(cab->ws)); i++) { if (cab->ws.s[i] == L'\\') cab->ws.s[i] = L'/'; } archive_entry_copy_pathname_w(entry, cab->ws.s); } } /* * Read CFHEADER, CFFOLDER and CFFILE. */ static int cab_read_header(struct archive_read *a) { const unsigned char *p; struct cab *cab; struct cfheader *hd; size_t bytes, used; ssize_t len; int64_t skip; int err, i; int cur_folder, prev_folder; uint32_t offset32; a->archive.archive_format = ARCHIVE_FORMAT_CAB; if (a->archive.archive_format_name == NULL) a->archive.archive_format_name = "CAB"; if ((p = __archive_read_ahead(a, 42, NULL)) == NULL) return (truncated_error(a)); cab = (struct cab *)(a->format->data); if (cab->found_header == 0 && p[0] == 'M' && p[1] == 'Z') { - /* This is an executable? Must be self-extracting... */ + /* This is an executable? Must be self-extracting... */ err = cab_skip_sfx(a); if (err < ARCHIVE_WARN) return (err); - if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) + /* Re-read header after processing the SFX. */ + if ((p = __archive_read_ahead(a, 42, NULL)) == NULL) return (truncated_error(a)); } cab->cab_offset = 0; /* * Read CFHEADER. */ hd = &cab->cfheader; if (p[CFHEADER_signature+0] != 'M' || p[CFHEADER_signature+1] != 'S' || p[CFHEADER_signature+2] != 'C' || p[CFHEADER_signature+3] != 'F') { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Couldn't find out CAB header"); return (ARCHIVE_FATAL); } hd->total_bytes = archive_le32dec(p + CFHEADER_cbCabinet); hd->files_offset = archive_le32dec(p + CFHEADER_coffFiles); hd->minor = p[CFHEADER_versionMinor]; hd->major = p[CFHEADER_versionMajor]; hd->folder_count = archive_le16dec(p + CFHEADER_cFolders); if (hd->folder_count == 0) goto invalid; hd->file_count = archive_le16dec(p + CFHEADER_cFiles); if (hd->file_count == 0) goto invalid; hd->flags = archive_le16dec(p + CFHEADER_flags); hd->setid = archive_le16dec(p + CFHEADER_setID); hd->cabinet = archive_le16dec(p + CFHEADER_iCabinet); used = CFHEADER_iCabinet + 2; if (hd->flags & RESERVE_PRESENT) { uint16_t cfheader; cfheader = archive_le16dec(p + CFHEADER_cbCFHeader); if (cfheader > 60000U) goto invalid; hd->cffolder = p[CFHEADER_cbCFFolder]; hd->cfdata = p[CFHEADER_cbCFData]; used += 4;/* cbCFHeader, cbCFFolder and cbCFData */ used += cfheader;/* abReserve */ } else hd->cffolder = 0;/* Avoid compiling warning. */ if (hd->flags & PREV_CABINET) { /* How many bytes are used for szCabinetPrev. */ if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL) return (truncated_error(a)); if ((len = cab_strnlen(p + used, 255)) <= 0) goto invalid; used += len + 1; /* How many bytes are used for szDiskPrev. */ if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL) return (truncated_error(a)); if ((len = cab_strnlen(p + used, 255)) <= 0) goto invalid; used += len + 1; } if (hd->flags & NEXT_CABINET) { /* How many bytes are used for szCabinetNext. */ if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL) return (truncated_error(a)); if ((len = cab_strnlen(p + used, 255)) <= 0) goto invalid; used += len + 1; /* How many bytes are used for szDiskNext. */ if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL) return (truncated_error(a)); if ((len = cab_strnlen(p + used, 255)) <= 0) goto invalid; used += len + 1; } __archive_read_consume(a, used); cab->cab_offset += used; used = 0; /* * Read CFFOLDER. */ hd->folder_array = (struct cffolder *)calloc( hd->folder_count, sizeof(struct cffolder)); if (hd->folder_array == NULL) goto nomem; bytes = 8; if (hd->flags & RESERVE_PRESENT) bytes += hd->cffolder; bytes *= hd->folder_count; if ((p = __archive_read_ahead(a, bytes, NULL)) == NULL) return (truncated_error(a)); offset32 = 0; for (i = 0; i < hd->folder_count; i++) { struct cffolder *folder = &(hd->folder_array[i]); folder->cfdata_offset_in_cab = archive_le32dec(p + CFFOLDER_coffCabStart); folder->cfdata_count = archive_le16dec(p+CFFOLDER_cCFData); folder->comptype = archive_le16dec(p+CFFOLDER_typeCompress) & 0x0F; folder->compdata = archive_le16dec(p+CFFOLDER_typeCompress) >> 8; /* Get a compression name. */ if (folder->comptype < sizeof(compression_name) / sizeof(compression_name[0])) folder->compname = compression_name[folder->comptype]; else folder->compname = "UNKNOWN"; p += 8; used += 8; if (hd->flags & RESERVE_PRESENT) { p += hd->cffolder;/* abReserve */ used += hd->cffolder; } /* * Sanity check if each data is acceptable. */ if (offset32 >= folder->cfdata_offset_in_cab) goto invalid; offset32 = folder->cfdata_offset_in_cab; /* Set a request to initialize zlib for the CFDATA of * this folder. */ folder->decompress_init = 0; } __archive_read_consume(a, used); cab->cab_offset += used; /* * Read CFFILE. */ /* Seek read pointer to the offset of CFFILE if needed. */ skip = (int64_t)hd->files_offset - cab->cab_offset; if (skip < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Invalid offset of CFFILE %jd < %jd", (intmax_t)hd->files_offset, (intmax_t)cab->cab_offset); return (ARCHIVE_FATAL); } if (skip) { __archive_read_consume(a, skip); cab->cab_offset += skip; } /* Allocate memory for CFDATA */ hd->file_array = (struct cffile *)calloc( hd->file_count, sizeof(struct cffile)); if (hd->file_array == NULL) goto nomem; prev_folder = -1; for (i = 0; i < hd->file_count; i++) { struct cffile *file = &(hd->file_array[i]); ssize_t avail; if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) return (truncated_error(a)); file->uncompressed_size = archive_le32dec(p + CFFILE_cbFile); file->offset = archive_le32dec(p + CFFILE_uoffFolderStart); file->folder = archive_le16dec(p + CFFILE_iFolder); file->mtime = cab_dos_time(p + CFFILE_date_time); file->attr = (uint8_t)archive_le16dec(p + CFFILE_attribs); __archive_read_consume(a, 16); cab->cab_offset += 16; if ((p = cab_read_ahead_remaining(a, 256, &avail)) == NULL) return (truncated_error(a)); if ((len = cab_strnlen(p, avail-1)) <= 0) goto invalid; /* Copy a pathname. */ archive_string_init(&(file->pathname)); archive_strncpy(&(file->pathname), p, len); __archive_read_consume(a, len + 1); cab->cab_offset += len + 1; /* * Sanity check if each data is acceptable. */ if (file->uncompressed_size > 0x7FFF8000) goto invalid;/* Too large */ if ((int64_t)file->offset + (int64_t)file->uncompressed_size > ARCHIVE_LITERAL_LL(0x7FFF8000)) goto invalid;/* Too large */ switch (file->folder) { case iFoldCONTINUED_TO_NEXT: /* This must be last file in a folder. */ if (i != hd->file_count -1) goto invalid; cur_folder = hd->folder_count -1; break; case iFoldCONTINUED_PREV_AND_NEXT: /* This must be only one file in a folder. */ if (hd->file_count != 1) goto invalid; /* FALL THROUGH */ case iFoldCONTINUED_FROM_PREV: /* This must be first file in a folder. */ if (i != 0) goto invalid; prev_folder = cur_folder = 0; offset32 = file->offset; break; default: if (file->folder >= hd->folder_count) goto invalid; cur_folder = file->folder; break; } /* Dot not back track. */ if (cur_folder < prev_folder) goto invalid; if (cur_folder != prev_folder) offset32 = 0; prev_folder = cur_folder; /* Make sure there are not any blanks from last file * contents. */ if (offset32 != file->offset) goto invalid; offset32 += file->uncompressed_size; /* CFDATA is available for file contents. */ if (file->uncompressed_size > 0 && hd->folder_array[cur_folder].cfdata_count == 0) goto invalid; } if (hd->cabinet != 0 || hd->flags & (PREV_CABINET | NEXT_CABINET)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Multivolume cabinet file is unsupported"); return (ARCHIVE_WARN); } return (ARCHIVE_OK); invalid: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CAB header"); return (ARCHIVE_FATAL); nomem: archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for CAB data"); return (ARCHIVE_FATAL); } static int archive_read_format_cab_read_header(struct archive_read *a, struct archive_entry *entry) { struct cab *cab; struct cfheader *hd; struct cffolder *prev_folder; struct cffile *file; struct archive_string_conv *sconv; int err = ARCHIVE_OK, r; cab = (struct cab *)(a->format->data); if (cab->found_header == 0) { err = cab_read_header(a); if (err < ARCHIVE_WARN) return (err); /* We've found the header. */ cab->found_header = 1; } hd = &cab->cfheader; if (hd->file_index >= hd->file_count) { cab->end_of_archive = 1; return (ARCHIVE_EOF); } file = &hd->file_array[hd->file_index++]; cab->end_of_entry = 0; cab->end_of_entry_cleanup = 0; cab->entry_compressed_bytes_read = 0; cab->entry_uncompressed_bytes_read = 0; cab->entry_unconsumed = 0; cab->entry_cffile = file; /* * Choose a proper folder. */ prev_folder = cab->entry_cffolder; switch (file->folder) { case iFoldCONTINUED_FROM_PREV: case iFoldCONTINUED_PREV_AND_NEXT: cab->entry_cffolder = &hd->folder_array[0]; break; case iFoldCONTINUED_TO_NEXT: cab->entry_cffolder = &hd->folder_array[hd->folder_count-1]; break; default: cab->entry_cffolder = &hd->folder_array[file->folder]; break; } /* If a cffolder of this file is changed, reset a cfdata to read * file contents from next cfdata. */ if (prev_folder != cab->entry_cffolder) cab->entry_cfdata = NULL; /* If a pathname is UTF-8, prepare a string conversion object * for UTF-8 and use it. */ if (file->attr & ATTR_NAME_IS_UTF) { if (cab->sconv_utf8 == NULL) { cab->sconv_utf8 = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (cab->sconv_utf8 == NULL) return (ARCHIVE_FATAL); } sconv = cab->sconv_utf8; } else if (cab->sconv != NULL) { /* Choose the conversion specified by the option. */ sconv = cab->sconv; } else { /* Choose the default conversion. */ if (!cab->init_default_conversion) { cab->sconv_default = archive_string_default_conversion_for_read( &(a->archive)); cab->init_default_conversion = 1; } sconv = cab->sconv_default; } /* * Set a default value and common data */ r = cab_convert_path_separator_1(&(file->pathname), file->attr); if (archive_entry_copy_pathname_l(entry, file->pathname.s, archive_strlen(&(file->pathname)), sconv) != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Pathname"); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Pathname cannot be converted " "from %s to current locale.", archive_string_conversion_charset_name(sconv)); err = ARCHIVE_WARN; } if (r < 0) { /* Convert a path separator '\' -> '/' */ cab_convert_path_separator_2(cab, entry); } archive_entry_set_size(entry, file->uncompressed_size); if (file->attr & ATTR_RDONLY) archive_entry_set_mode(entry, AE_IFREG | 0555); else archive_entry_set_mode(entry, AE_IFREG | 0666); archive_entry_set_mtime(entry, file->mtime, 0); cab->entry_bytes_remaining = file->uncompressed_size; cab->entry_offset = 0; /* We don't need compress data. */ if (file->uncompressed_size == 0) cab->end_of_entry_cleanup = cab->end_of_entry = 1; /* Set up a more descriptive format name. */ sprintf(cab->format_name, "CAB %d.%d (%s)", hd->major, hd->minor, cab->entry_cffolder->compname); a->archive.archive_format_name = cab->format_name; return (err); } static int archive_read_format_cab_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct cab *cab = (struct cab *)(a->format->data); int r; switch (cab->entry_cffile->folder) { case iFoldCONTINUED_FROM_PREV: case iFoldCONTINUED_TO_NEXT: case iFoldCONTINUED_PREV_AND_NEXT: *buff = NULL; *size = 0; *offset = 0; archive_clear_error(&a->archive); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Cannot restore this file split in multivolume."); return (ARCHIVE_FAILED); default: break; } if (cab->read_data_invoked == 0) { if (cab->bytes_skipped) { if (cab->entry_cfdata == NULL) { r = cab_next_cfdata(a); if (r < 0) return (r); } if (cab_consume_cfdata(a, cab->bytes_skipped) < 0) return (ARCHIVE_FATAL); cab->bytes_skipped = 0; } cab->read_data_invoked = 1; } if (cab->entry_unconsumed) { /* Consume as much as the compressor actually used. */ r = (int)cab_consume_cfdata(a, cab->entry_unconsumed); cab->entry_unconsumed = 0; if (r < 0) return (r); } if (cab->end_of_archive || cab->end_of_entry) { if (!cab->end_of_entry_cleanup) { /* End-of-entry cleanup done. */ cab->end_of_entry_cleanup = 1; } *offset = cab->entry_offset; *size = 0; *buff = NULL; return (ARCHIVE_EOF); } return (cab_read_data(a, buff, size, offset)); } static uint32_t cab_checksum_cfdata_4(const void *p, size_t bytes, uint32_t seed) { const unsigned char *b; unsigned u32num; uint32_t sum; u32num = (unsigned)bytes / 4; sum = seed; b = p; for (;u32num > 0; --u32num) { sum ^= archive_le32dec(b); b += 4; } return (sum); } static uint32_t cab_checksum_cfdata(const void *p, size_t bytes, uint32_t seed) { const unsigned char *b; uint32_t sum; uint32_t t; sum = cab_checksum_cfdata_4(p, bytes, seed); b = p; b += bytes & ~3; t = 0; switch (bytes & 3) { case 3: t |= ((uint32_t)(*b++)) << 16; /* FALL THROUGH */ case 2: t |= ((uint32_t)(*b++)) << 8; /* FALL THROUGH */ case 1: t |= *b; /* FALL THROUGH */ default: break; } sum ^= t; return (sum); } static void cab_checksum_update(struct archive_read *a, size_t bytes) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata = cab->entry_cfdata; const unsigned char *p; size_t sumbytes; if (cfdata->sum == 0 || cfdata->sum_ptr == NULL) return; /* * Calculate the sum of this CFDATA. * Make sure CFDATA must be calculated in four bytes. */ p = cfdata->sum_ptr; sumbytes = bytes; if (cfdata->sum_extra_avail) { while (cfdata->sum_extra_avail < 4 && sumbytes > 0) { cfdata->sum_extra[ cfdata->sum_extra_avail++] = *p++; sumbytes--; } if (cfdata->sum_extra_avail == 4) { cfdata->sum_calculated = cab_checksum_cfdata_4( cfdata->sum_extra, 4, cfdata->sum_calculated); cfdata->sum_extra_avail = 0; } } if (sumbytes) { int odd = sumbytes & 3; if (sumbytes - odd > 0) cfdata->sum_calculated = cab_checksum_cfdata_4( p, sumbytes - odd, cfdata->sum_calculated); if (odd) memcpy(cfdata->sum_extra, p + sumbytes - odd, odd); cfdata->sum_extra_avail = odd; } cfdata->sum_ptr = NULL; } static int cab_checksum_finish(struct archive_read *a) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata = cab->entry_cfdata; int l; /* Do not need to compute a sum. */ if (cfdata->sum == 0) return (ARCHIVE_OK); /* * Calculate the sum of remaining CFDATA. */ if (cfdata->sum_extra_avail) { cfdata->sum_calculated = cab_checksum_cfdata(cfdata->sum_extra, cfdata->sum_extra_avail, cfdata->sum_calculated); cfdata->sum_extra_avail = 0; } l = 4; if (cab->cfheader.flags & RESERVE_PRESENT) l += cab->cfheader.cfdata; cfdata->sum_calculated = cab_checksum_cfdata( cfdata->memimage + CFDATA_cbData, l, cfdata->sum_calculated); if (cfdata->sum_calculated != cfdata->sum) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Checksum error CFDATA[%d] %x:%x in %d bytes", cab->entry_cffolder->cfdata_index -1, cfdata->sum, cfdata->sum_calculated, cfdata->compressed_size); return (ARCHIVE_FAILED); } return (ARCHIVE_OK); } /* * Read CFDATA if needed. */ static int cab_next_cfdata(struct archive_read *a) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata = cab->entry_cfdata; /* There are remaining bytes in current CFDATA, use it first. */ if (cfdata != NULL && cfdata->uncompressed_bytes_remaining > 0) return (ARCHIVE_OK); if (cfdata == NULL) { int64_t skip; cab->entry_cffolder->cfdata_index = 0; /* Seek read pointer to the offset of CFDATA if needed. */ skip = cab->entry_cffolder->cfdata_offset_in_cab - cab->cab_offset; if (skip < 0) { int folder_index; switch (cab->entry_cffile->folder) { case iFoldCONTINUED_FROM_PREV: case iFoldCONTINUED_PREV_AND_NEXT: folder_index = 0; break; case iFoldCONTINUED_TO_NEXT: folder_index = cab->cfheader.folder_count-1; break; default: folder_index = cab->entry_cffile->folder; break; } archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Invalid offset of CFDATA in folder(%d) %jd < %jd", folder_index, (intmax_t)cab->entry_cffolder->cfdata_offset_in_cab, (intmax_t)cab->cab_offset); return (ARCHIVE_FATAL); } if (skip > 0) { if (__archive_read_consume(a, skip) < 0) return (ARCHIVE_FATAL); cab->cab_offset = cab->entry_cffolder->cfdata_offset_in_cab; } } /* * Read a CFDATA. */ if (cab->entry_cffolder->cfdata_index < cab->entry_cffolder->cfdata_count) { const unsigned char *p; int l; cfdata = &(cab->entry_cffolder->cfdata); cab->entry_cffolder->cfdata_index++; cab->entry_cfdata = cfdata; cfdata->sum_calculated = 0; cfdata->sum_extra_avail = 0; cfdata->sum_ptr = NULL; l = 8; if (cab->cfheader.flags & RESERVE_PRESENT) l += cab->cfheader.cfdata; if ((p = __archive_read_ahead(a, l, NULL)) == NULL) return (truncated_error(a)); cfdata->sum = archive_le32dec(p + CFDATA_csum); cfdata->compressed_size = archive_le16dec(p + CFDATA_cbData); cfdata->compressed_bytes_remaining = cfdata->compressed_size; cfdata->uncompressed_size = archive_le16dec(p + CFDATA_cbUncomp); cfdata->uncompressed_bytes_remaining = cfdata->uncompressed_size; cfdata->uncompressed_avail = 0; cfdata->read_offset = 0; cfdata->unconsumed = 0; /* * Sanity check if data size is acceptable. */ if (cfdata->compressed_size == 0 || cfdata->compressed_size > (0x8000+6144)) goto invalid; if (cfdata->uncompressed_size > 0x8000) goto invalid; if (cfdata->uncompressed_size == 0) { switch (cab->entry_cffile->folder) { case iFoldCONTINUED_PREV_AND_NEXT: case iFoldCONTINUED_TO_NEXT: break; case iFoldCONTINUED_FROM_PREV: default: goto invalid; } } /* If CFDATA is not last in a folder, an uncompressed * size must be 0x8000(32KBi) */ if ((cab->entry_cffolder->cfdata_index < cab->entry_cffolder->cfdata_count) && cfdata->uncompressed_size != 0x8000) goto invalid; /* A compressed data size and an uncompressed data size must * be the same in no compression mode. */ if (cab->entry_cffolder->comptype == COMPTYPE_NONE && cfdata->compressed_size != cfdata->uncompressed_size) goto invalid; /* * Save CFDATA image for sum check. */ if (cfdata->memimage_size < (size_t)l) { free(cfdata->memimage); cfdata->memimage = malloc(l); if (cfdata->memimage == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for CAB data"); return (ARCHIVE_FATAL); } cfdata->memimage_size = l; } memcpy(cfdata->memimage, p, l); /* Consume bytes as much as we used. */ __archive_read_consume(a, l); cab->cab_offset += l; } else if (cab->entry_cffolder->cfdata_count > 0) { /* Run out of all CFDATA in a folder. */ cfdata->compressed_size = 0; cfdata->uncompressed_size = 0; cfdata->compressed_bytes_remaining = 0; cfdata->uncompressed_bytes_remaining = 0; } else { /* Current folder does not have any CFDATA. */ cfdata = &(cab->entry_cffolder->cfdata); cab->entry_cfdata = cfdata; memset(cfdata, 0, sizeof(*cfdata)); } return (ARCHIVE_OK); invalid: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA"); return (ARCHIVE_FATAL); } /* * Read ahead CFDATA. */ static const void * cab_read_ahead_cfdata(struct archive_read *a, ssize_t *avail) { struct cab *cab = (struct cab *)(a->format->data); int err; err = cab_next_cfdata(a); if (err < ARCHIVE_OK) { *avail = err; return (NULL); } switch (cab->entry_cffolder->comptype) { case COMPTYPE_NONE: return (cab_read_ahead_cfdata_none(a, avail)); case COMPTYPE_MSZIP: return (cab_read_ahead_cfdata_deflate(a, avail)); case COMPTYPE_LZX: return (cab_read_ahead_cfdata_lzx(a, avail)); default: /* Unsupported compression. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported CAB compression : %s", cab->entry_cffolder->compname); *avail = ARCHIVE_FAILED; return (NULL); } } /* * Read ahead CFDATA as uncompressed data. */ static const void * cab_read_ahead_cfdata_none(struct archive_read *a, ssize_t *avail) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata; const void *d; cfdata = cab->entry_cfdata; /* * Note: '1' here is a performance optimization. * Recall that the decompression layer returns a count of * available bytes; asking for more than that forces the * decompressor to combine reads by copying data. */ d = __archive_read_ahead(a, 1, avail); if (*avail <= 0) { *avail = truncated_error(a); return (NULL); } if (*avail > cfdata->uncompressed_bytes_remaining) *avail = cfdata->uncompressed_bytes_remaining; cfdata->uncompressed_avail = cfdata->uncompressed_size; cfdata->unconsumed = *avail; cfdata->sum_ptr = d; return (d); } /* * Read ahead CFDATA as deflate data. */ #ifdef HAVE_ZLIB_H static const void * cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata; const void *d; int r, mszip; uint16_t uavail; char eod = 0; cfdata = cab->entry_cfdata; /* If the buffer hasn't been allocated, allocate it now. */ if (cab->uncompressed_buffer == NULL) { cab->uncompressed_buffer_size = 0x8000; cab->uncompressed_buffer = (unsigned char *)malloc(cab->uncompressed_buffer_size); if (cab->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for CAB reader"); *avail = ARCHIVE_FATAL; return (NULL); } } uavail = cfdata->uncompressed_avail; if (uavail == cfdata->uncompressed_size) { d = cab->uncompressed_buffer + cfdata->read_offset; *avail = uavail - cfdata->read_offset; return (d); } if (!cab->entry_cffolder->decompress_init) { cab->stream.next_in = NULL; cab->stream.avail_in = 0; cab->stream.total_in = 0; cab->stream.next_out = NULL; cab->stream.avail_out = 0; cab->stream.total_out = 0; if (cab->stream_valid) r = inflateReset(&cab->stream); else r = inflateInit2(&cab->stream, -15 /* Don't check for zlib header */); if (r != Z_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't initialize deflate decompression."); *avail = ARCHIVE_FATAL; return (NULL); } /* Stream structure has been set up. */ cab->stream_valid = 1; /* We've initialized decompression for this stream. */ cab->entry_cffolder->decompress_init = 1; } if (cfdata->compressed_bytes_remaining == cfdata->compressed_size) mszip = 2; else mszip = 0; eod = 0; cab->stream.total_out = uavail; /* * We always uncompress all data in current CFDATA. */ while (!eod && cab->stream.total_out < cfdata->uncompressed_size) { ssize_t bytes_avail; cab->stream.next_out = cab->uncompressed_buffer + cab->stream.total_out; cab->stream.avail_out = cfdata->uncompressed_size - cab->stream.total_out; d = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail <= 0) { *avail = truncated_error(a); return (NULL); } if (bytes_avail > cfdata->compressed_bytes_remaining) bytes_avail = cfdata->compressed_bytes_remaining; /* * A bug in zlib.h: stream.next_in should be marked 'const' * but isn't (the library never alters data through the * next_in pointer, only reads it). The result: this ugly * cast to remove 'const'. */ cab->stream.next_in = (Bytef *)(uintptr_t)d; cab->stream.avail_in = (uInt)bytes_avail; cab->stream.total_in = 0; /* Cut out a tow-byte MSZIP signature(0x43, 0x4b). */ if (mszip > 0) { if (bytes_avail <= mszip) { if (mszip == 2) { if (cab->stream.next_in[0] != 0x43) goto nomszip; if (bytes_avail > 1 && cab->stream.next_in[1] != 0x4b) goto nomszip; } else if (cab->stream.next_in[0] != 0x4b) goto nomszip; cfdata->unconsumed = bytes_avail; cfdata->sum_ptr = d; if (cab_minimum_consume_cfdata( a, cfdata->unconsumed) < 0) { *avail = ARCHIVE_FATAL; return (NULL); } mszip -= (int)bytes_avail; continue; } if (mszip == 1 && cab->stream.next_in[0] != 0x4b) goto nomszip; else if (cab->stream.next_in[0] != 0x43 || cab->stream.next_in[1] != 0x4b) goto nomszip; cab->stream.next_in += mszip; cab->stream.avail_in -= mszip; cab->stream.total_in += mszip; mszip = 0; } r = inflate(&cab->stream, 0); switch (r) { case Z_OK: break; case Z_STREAM_END: eod = 1; break; default: goto zlibfailed; } cfdata->unconsumed = cab->stream.total_in; cfdata->sum_ptr = d; if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) { *avail = ARCHIVE_FATAL; return (NULL); } } uavail = (uint16_t)cab->stream.total_out; if (uavail < cfdata->uncompressed_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Invalid uncompressed size (%d < %d)", uavail, cfdata->uncompressed_size); *avail = ARCHIVE_FATAL; return (NULL); } /* * Note: I suspect there is a bug in makecab.exe because, in rare * case, compressed bytes are still remaining regardless we have * gotten all uncompressed bytes, which size is recoded in CFDATA, * as much as we need, and we have to use the garbage so as to * correctly compute the sum of CFDATA accordingly. */ if (cfdata->compressed_bytes_remaining > 0) { ssize_t bytes_avail; d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining, &bytes_avail); if (bytes_avail <= 0) { *avail = truncated_error(a); return (NULL); } cfdata->unconsumed = cfdata->compressed_bytes_remaining; cfdata->sum_ptr = d; if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) { *avail = ARCHIVE_FATAL; return (NULL); } } /* * Set dictionary data for decompressing of next CFDATA, which * in the same folder. This is why we always do decompress CFDATA * even if beginning CFDATA or some of CFDATA are not used in * skipping file data. */ if (cab->entry_cffolder->cfdata_index < cab->entry_cffolder->cfdata_count) { r = inflateReset(&cab->stream); if (r != Z_OK) goto zlibfailed; r = inflateSetDictionary(&cab->stream, cab->uncompressed_buffer, cfdata->uncompressed_size); if (r != Z_OK) goto zlibfailed; } d = cab->uncompressed_buffer + cfdata->read_offset; *avail = uavail - cfdata->read_offset; cfdata->uncompressed_avail = uavail; return (d); zlibfailed: switch (r) { case Z_MEM_ERROR: archive_set_error(&a->archive, ENOMEM, "Out of memory for deflate decompression"); break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Deflate decompression failed (%d)", r); break; } *avail = ARCHIVE_FATAL; return (NULL); nomszip: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "CFDATA incorrect(no MSZIP signature)"); *avail = ARCHIVE_FATAL; return (NULL); } #else /* HAVE_ZLIB_H */ static const void * cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail) { *avail = ARCHIVE_FATAL; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "libarchive compiled without deflate support (no libz)"); return (NULL); } #endif /* HAVE_ZLIB_H */ static const void * cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata; const void *d; int r; uint16_t uavail; cfdata = cab->entry_cfdata; /* If the buffer hasn't been allocated, allocate it now. */ if (cab->uncompressed_buffer == NULL) { cab->uncompressed_buffer_size = 0x8000; cab->uncompressed_buffer = (unsigned char *)malloc(cab->uncompressed_buffer_size); if (cab->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for CAB reader"); *avail = ARCHIVE_FATAL; return (NULL); } } uavail = cfdata->uncompressed_avail; if (uavail == cfdata->uncompressed_size) { d = cab->uncompressed_buffer + cfdata->read_offset; *avail = uavail - cfdata->read_offset; return (d); } if (!cab->entry_cffolder->decompress_init) { r = lzx_decode_init(&cab->xstrm, cab->entry_cffolder->compdata); if (r != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't initialize LZX decompression."); *avail = ARCHIVE_FATAL; return (NULL); } /* We've initialized decompression for this stream. */ cab->entry_cffolder->decompress_init = 1; } /* Clean up remaining bits of previous CFDATA. */ lzx_cleanup_bitstream(&cab->xstrm); cab->xstrm.total_out = uavail; while (cab->xstrm.total_out < cfdata->uncompressed_size) { ssize_t bytes_avail; cab->xstrm.next_out = cab->uncompressed_buffer + cab->xstrm.total_out; cab->xstrm.avail_out = cfdata->uncompressed_size - cab->xstrm.total_out; d = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated CAB file data"); *avail = ARCHIVE_FATAL; return (NULL); } if (bytes_avail > cfdata->compressed_bytes_remaining) bytes_avail = cfdata->compressed_bytes_remaining; cab->xstrm.next_in = d; cab->xstrm.avail_in = bytes_avail; cab->xstrm.total_in = 0; r = lzx_decode(&cab->xstrm, cfdata->compressed_bytes_remaining == bytes_avail); switch (r) { case ARCHIVE_OK: case ARCHIVE_EOF: break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "LZX decompression failed (%d)", r); *avail = ARCHIVE_FATAL; return (NULL); } cfdata->unconsumed = cab->xstrm.total_in; cfdata->sum_ptr = d; if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) { *avail = ARCHIVE_FATAL; return (NULL); } } uavail = (uint16_t)cab->xstrm.total_out; /* * Make sure a read pointer advances to next CFDATA. */ if (cfdata->compressed_bytes_remaining > 0) { ssize_t bytes_avail; d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining, &bytes_avail); if (bytes_avail <= 0) { *avail = truncated_error(a); return (NULL); } cfdata->unconsumed = cfdata->compressed_bytes_remaining; cfdata->sum_ptr = d; if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) { *avail = ARCHIVE_FATAL; return (NULL); } } /* * Translation reversal of x86 proccessor CALL byte sequence(E8). */ lzx_translation(&cab->xstrm, cab->uncompressed_buffer, cfdata->uncompressed_size, (cab->entry_cffolder->cfdata_index-1) * 0x8000); d = cab->uncompressed_buffer + cfdata->read_offset; *avail = uavail - cfdata->read_offset; cfdata->uncompressed_avail = uavail; return (d); } /* * Consume CFDATA. * We always decompress CFDATA to consume CFDATA as much as we need * in uncompressed bytes because all CFDATA in a folder are related * so we do not skip any CFDATA without decompressing. * Note: If the folder of a CFFILE is iFoldCONTINUED_PREV_AND_NEXT or * iFoldCONTINUED_FROM_PREV, we won't decompress because a CFDATA for * the CFFILE is remaining bytes of previous Multivolume CAB file. */ static int64_t cab_consume_cfdata(struct archive_read *a, int64_t consumed_bytes) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata; int64_t cbytes, rbytes; int err; rbytes = cab_minimum_consume_cfdata(a, consumed_bytes); if (rbytes < 0) return (ARCHIVE_FATAL); cfdata = cab->entry_cfdata; while (rbytes > 0) { ssize_t avail; if (cfdata->compressed_size == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA"); return (ARCHIVE_FATAL); } cbytes = cfdata->uncompressed_bytes_remaining; if (cbytes > rbytes) cbytes = rbytes; rbytes -= cbytes; if (cfdata->uncompressed_avail == 0 && (cab->entry_cffile->folder == iFoldCONTINUED_PREV_AND_NEXT || cab->entry_cffile->folder == iFoldCONTINUED_FROM_PREV)) { /* We have not read any data yet. */ if (cbytes == cfdata->uncompressed_bytes_remaining) { /* Skip whole current CFDATA. */ __archive_read_consume(a, cfdata->compressed_size); cab->cab_offset += cfdata->compressed_size; cfdata->compressed_bytes_remaining = 0; cfdata->uncompressed_bytes_remaining = 0; err = cab_next_cfdata(a); if (err < 0) return (err); cfdata = cab->entry_cfdata; if (cfdata->uncompressed_size == 0) { switch (cab->entry_cffile->folder) { case iFoldCONTINUED_PREV_AND_NEXT: case iFoldCONTINUED_TO_NEXT: case iFoldCONTINUED_FROM_PREV: rbytes = 0; break; default: break; } } continue; } cfdata->read_offset += (uint16_t)cbytes; cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes; break; } else if (cbytes == 0) { err = cab_next_cfdata(a); if (err < 0) return (err); cfdata = cab->entry_cfdata; if (cfdata->uncompressed_size == 0) { switch (cab->entry_cffile->folder) { case iFoldCONTINUED_PREV_AND_NEXT: case iFoldCONTINUED_TO_NEXT: case iFoldCONTINUED_FROM_PREV: return (ARCHIVE_FATAL); default: break; } } continue; } while (cbytes > 0) { (void)cab_read_ahead_cfdata(a, &avail); if (avail <= 0) return (ARCHIVE_FATAL); if (avail > cbytes) avail = (ssize_t)cbytes; if (cab_minimum_consume_cfdata(a, avail) < 0) return (ARCHIVE_FATAL); cbytes -= avail; } } return (consumed_bytes); } /* * Consume CFDATA as much as we have already gotten and * compute the sum of CFDATA. */ static int64_t cab_minimum_consume_cfdata(struct archive_read *a, int64_t consumed_bytes) { struct cab *cab = (struct cab *)(a->format->data); struct cfdata *cfdata; int64_t cbytes, rbytes; int err; cfdata = cab->entry_cfdata; rbytes = consumed_bytes; if (cab->entry_cffolder->comptype == COMPTYPE_NONE) { if (consumed_bytes < cfdata->unconsumed) cbytes = consumed_bytes; else cbytes = cfdata->unconsumed; rbytes -= cbytes; cfdata->read_offset += (uint16_t)cbytes; cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes; cfdata->unconsumed -= cbytes; } else { cbytes = cfdata->uncompressed_avail - cfdata->read_offset; if (cbytes > 0) { if (consumed_bytes < cbytes) cbytes = consumed_bytes; rbytes -= cbytes; cfdata->read_offset += (uint16_t)cbytes; cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes; } if (cfdata->unconsumed) { cbytes = cfdata->unconsumed; cfdata->unconsumed = 0; } else cbytes = 0; } if (cbytes) { /* Compute the sum. */ cab_checksum_update(a, (size_t)cbytes); /* Consume as much as the compressor actually used. */ __archive_read_consume(a, cbytes); cab->cab_offset += cbytes; cfdata->compressed_bytes_remaining -= (uint16_t)cbytes; if (cfdata->compressed_bytes_remaining == 0) { err = cab_checksum_finish(a); if (err < 0) return (err); } } return (rbytes); } /* * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets * cab->end_of_entry if it consumes all of the data. */ static int cab_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct cab *cab = (struct cab *)(a->format->data); ssize_t bytes_avail; if (cab->entry_bytes_remaining == 0) { *buff = NULL; *size = 0; *offset = cab->entry_offset; cab->end_of_entry = 1; return (ARCHIVE_OK); } *buff = cab_read_ahead_cfdata(a, &bytes_avail); if (bytes_avail <= 0) { *buff = NULL; *size = 0; *offset = 0; if (bytes_avail == 0 && cab->entry_cfdata->uncompressed_size == 0) { /* All of CFDATA in a folder has been handled. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA"); return (ARCHIVE_FATAL); } else return ((int)bytes_avail); } if (bytes_avail > cab->entry_bytes_remaining) bytes_avail = (ssize_t)cab->entry_bytes_remaining; *size = bytes_avail; *offset = cab->entry_offset; cab->entry_offset += bytes_avail; cab->entry_bytes_remaining -= bytes_avail; if (cab->entry_bytes_remaining == 0) cab->end_of_entry = 1; cab->entry_unconsumed = bytes_avail; if (cab->entry_cffolder->comptype == COMPTYPE_NONE) { /* Don't consume more than current entry used. */ if (cab->entry_cfdata->unconsumed > cab->entry_unconsumed) cab->entry_cfdata->unconsumed = cab->entry_unconsumed; } return (ARCHIVE_OK); } static int archive_read_format_cab_read_data_skip(struct archive_read *a) { struct cab *cab; int64_t bytes_skipped; int r; cab = (struct cab *)(a->format->data); if (cab->end_of_archive) return (ARCHIVE_EOF); if (!cab->read_data_invoked) { cab->bytes_skipped += cab->entry_bytes_remaining; cab->entry_bytes_remaining = 0; /* This entry is finished and done. */ cab->end_of_entry_cleanup = cab->end_of_entry = 1; return (ARCHIVE_OK); } if (cab->entry_unconsumed) { /* Consume as much as the compressor actually used. */ r = (int)cab_consume_cfdata(a, cab->entry_unconsumed); cab->entry_unconsumed = 0; if (r < 0) return (r); } else if (cab->entry_cfdata == NULL) { r = cab_next_cfdata(a); if (r < 0) return (r); } /* if we've already read to end of data, we're done. */ if (cab->end_of_entry_cleanup) return (ARCHIVE_OK); /* * If the length is at the beginning, we can skip the * compressed data much more quickly. */ bytes_skipped = cab_consume_cfdata(a, cab->entry_bytes_remaining); if (bytes_skipped < 0) return (ARCHIVE_FATAL); /* If the compression type is none(uncompressed), we've already * consumed data as much as the current entry size. */ if (cab->entry_cffolder->comptype == COMPTYPE_NONE && cab->entry_cfdata != NULL) cab->entry_cfdata->unconsumed = 0; /* This entry is finished and done. */ cab->end_of_entry_cleanup = cab->end_of_entry = 1; return (ARCHIVE_OK); } static int archive_read_format_cab_cleanup(struct archive_read *a) { struct cab *cab = (struct cab *)(a->format->data); struct cfheader *hd = &cab->cfheader; int i; if (hd->folder_array != NULL) { for (i = 0; i < hd->folder_count; i++) free(hd->folder_array[i].cfdata.memimage); free(hd->folder_array); } if (hd->file_array != NULL) { for (i = 0; i < cab->cfheader.file_count; i++) archive_string_free(&(hd->file_array[i].pathname)); free(hd->file_array); } #ifdef HAVE_ZLIB_H if (cab->stream_valid) inflateEnd(&cab->stream); #endif lzx_decode_free(&cab->xstrm); archive_wstring_free(&cab->ws); free(cab->uncompressed_buffer); free(cab); (a->format->data) = NULL; return (ARCHIVE_OK); } /* Convert an MSDOS-style date/time into Unix-style time. */ static time_t cab_dos_time(const unsigned char *p) { int msTime, msDate; struct tm ts; msDate = archive_le16dec(p); msTime = archive_le16dec(p+2); memset(&ts, 0, sizeof(ts)); ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ ts.tm_mday = msDate & 0x1f; /* Day of month. */ ts.tm_hour = (msTime >> 11) & 0x1f; ts.tm_min = (msTime >> 5) & 0x3f; ts.tm_sec = (msTime << 1) & 0x3e; ts.tm_isdst = -1; return (mktime(&ts)); } /***************************************************************** * * LZX decompression code. * *****************************************************************/ /* * Initialize LZX decoder. * * Returns ARCHIVE_OK if initialization was successful. * Returns ARCHIVE_FAILED if w_bits has unsupported value. * Returns ARCHIVE_FATAL if initialization failed; memory allocation * error occurred. */ static int lzx_decode_init(struct lzx_stream *strm, int w_bits) { struct lzx_dec *ds; int slot, w_size, w_slot; int base, footer; int base_inc[18]; if (strm->ds == NULL) { strm->ds = calloc(1, sizeof(*strm->ds)); if (strm->ds == NULL) return (ARCHIVE_FATAL); } ds = strm->ds; ds->error = ARCHIVE_FAILED; /* Allow bits from 15(32KBi) up to 21(2MBi) */ if (w_bits < SLOT_BASE || w_bits > SLOT_MAX) return (ARCHIVE_FAILED); ds->error = ARCHIVE_FATAL; /* * Alloc window */ w_size = ds->w_size; w_slot = slots[w_bits - SLOT_BASE]; ds->w_size = 1U << w_bits; ds->w_mask = ds->w_size -1; if (ds->w_buff == NULL || w_size != ds->w_size) { free(ds->w_buff); ds->w_buff = malloc(ds->w_size); if (ds->w_buff == NULL) return (ARCHIVE_FATAL); free(ds->pos_tbl); ds->pos_tbl = malloc(sizeof(ds->pos_tbl[0]) * w_slot); if (ds->pos_tbl == NULL) return (ARCHIVE_FATAL); lzx_huffman_free(&(ds->mt)); } for (footer = 0; footer < 18; footer++) base_inc[footer] = 1 << footer; base = footer = 0; for (slot = 0; slot < w_slot; slot++) { int n; if (footer == 0) base = slot; else base += base_inc[footer]; if (footer < 17) { footer = -2; for (n = base; n; n >>= 1) footer++; if (footer <= 0) footer = 0; } ds->pos_tbl[slot].base = base; ds->pos_tbl[slot].footer_bits = footer; } ds->w_pos = 0; ds->state = 0; ds->br.cache_buffer = 0; ds->br.cache_avail = 0; ds->r0 = ds->r1 = ds->r2 = 1; /* Initialize aligned offset tree. */ if (lzx_huffman_init(&(ds->at), 8, 8) != ARCHIVE_OK) return (ARCHIVE_FATAL); /* Initialize pre-tree. */ if (lzx_huffman_init(&(ds->pt), 20, 10) != ARCHIVE_OK) return (ARCHIVE_FATAL); /* Initialize Main tree. */ if (lzx_huffman_init(&(ds->mt), 256+(w_slot<<3), 16) != ARCHIVE_OK) return (ARCHIVE_FATAL); /* Initialize Length tree. */ if (lzx_huffman_init(&(ds->lt), 249, 16) != ARCHIVE_OK) return (ARCHIVE_FATAL); ds->error = 0; return (ARCHIVE_OK); } /* * Release LZX decoder. */ static void lzx_decode_free(struct lzx_stream *strm) { if (strm->ds == NULL) return; free(strm->ds->w_buff); free(strm->ds->pos_tbl); lzx_huffman_free(&(strm->ds->at)); lzx_huffman_free(&(strm->ds->pt)); lzx_huffman_free(&(strm->ds->mt)); lzx_huffman_free(&(strm->ds->lt)); free(strm->ds); strm->ds = NULL; } /* * E8 Call Translation reversal. */ static void lzx_translation(struct lzx_stream *strm, void *p, size_t size, uint32_t offset) { struct lzx_dec *ds = strm->ds; unsigned char *b, *end; if (!ds->translation || size <= 10) return; b = p; end = b + size - 10; while (b < end && (b = memchr(b, 0xE8, end - b)) != NULL) { size_t i = b - (unsigned char *)p; int32_t cp, displacement, value; cp = (int32_t)(offset + (uint32_t)i); value = archive_le32dec(&b[1]); if (value >= -cp && value < (int32_t)ds->translation_size) { if (value >= 0) displacement = value - cp; else displacement = value + ds->translation_size; archive_le32enc(&b[1], (uint32_t)displacement); } b += 5; } } /* * Bit stream reader. */ /* Check that the cache buffer has enough bits. */ #define lzx_br_has(br, n) ((br)->cache_avail >= n) /* Get compressed data by bit. */ #define lzx_br_bits(br, n) \ (((uint32_t)((br)->cache_buffer >> \ ((br)->cache_avail - (n)))) & cache_masks[n]) #define lzx_br_bits_forced(br, n) \ (((uint32_t)((br)->cache_buffer << \ ((n) - (br)->cache_avail))) & cache_masks[n]) /* Read ahead to make sure the cache buffer has enough compressed data we * will use. * True : completed, there is enough data in the cache buffer. * False : we met that strm->next_in is empty, we have to get following * bytes. */ #define lzx_br_read_ahead_0(strm, br, n) \ (lzx_br_has((br), (n)) || lzx_br_fillup(strm, br)) /* True : the cache buffer has some bits as much as we need. * False : there are no enough bits in the cache buffer to be used, * we have to get following bytes if we could. */ #define lzx_br_read_ahead(strm, br, n) \ (lzx_br_read_ahead_0((strm), (br), (n)) || lzx_br_has((br), (n))) /* Notify how many bits we consumed. */ #define lzx_br_consume(br, n) ((br)->cache_avail -= (n)) #define lzx_br_consume_unaligned_bits(br) ((br)->cache_avail &= ~0x0f) #define lzx_br_is_unaligned(br) ((br)->cache_avail & 0x0f) static const uint32_t cache_masks[] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; /* * Shift away used bits in the cache data and fill it up with following bits. * Call this when cache buffer does not have enough bits you need. * * Returns 1 if the cache buffer is full. * Returns 0 if the cache buffer is not full; input buffer is empty. */ static int lzx_br_fillup(struct lzx_stream *strm, struct lzx_br *br) { /* * x86 proccessor family can read misaligned data without an access error. */ int n = CACHE_BITS - br->cache_avail; for (;;) { switch (n >> 4) { case 4: if (strm->avail_in >= 8) { br->cache_buffer = ((uint64_t)strm->next_in[1]) << 56 | ((uint64_t)strm->next_in[0]) << 48 | ((uint64_t)strm->next_in[3]) << 40 | ((uint64_t)strm->next_in[2]) << 32 | ((uint32_t)strm->next_in[5]) << 24 | ((uint32_t)strm->next_in[4]) << 16 | ((uint32_t)strm->next_in[7]) << 8 | (uint32_t)strm->next_in[6]; strm->next_in += 8; strm->avail_in -= 8; br->cache_avail += 8 * 8; return (1); } break; case 3: if (strm->avail_in >= 6) { br->cache_buffer = (br->cache_buffer << 48) | ((uint64_t)strm->next_in[1]) << 40 | ((uint64_t)strm->next_in[0]) << 32 | ((uint32_t)strm->next_in[3]) << 24 | ((uint32_t)strm->next_in[2]) << 16 | ((uint32_t)strm->next_in[5]) << 8 | (uint32_t)strm->next_in[4]; strm->next_in += 6; strm->avail_in -= 6; br->cache_avail += 6 * 8; return (1); } break; case 0: /* We have enough compressed data in * the cache buffer.*/ return (1); default: break; } if (strm->avail_in < 2) { /* There is not enough compressed data to * fill up the cache buffer. */ if (strm->avail_in == 1) { br->odd = *strm->next_in++; strm->avail_in--; br->have_odd = 1; } return (0); } br->cache_buffer = (br->cache_buffer << 16) | archive_le16dec(strm->next_in); strm->next_in += 2; strm->avail_in -= 2; br->cache_avail += 16; n -= 16; } } static void lzx_br_fixup(struct lzx_stream *strm, struct lzx_br *br) { int n = CACHE_BITS - br->cache_avail; if (br->have_odd && n >= 16 && strm->avail_in > 0) { br->cache_buffer = (br->cache_buffer << 16) | ((uint16_t)(*strm->next_in)) << 8 | br->odd; strm->next_in++; strm->avail_in--; br->cache_avail += 16; br->have_odd = 0; } } static void lzx_cleanup_bitstream(struct lzx_stream *strm) { strm->ds->br.cache_avail = 0; strm->ds->br.have_odd = 0; } /* * Decode LZX. * * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. * Please set available buffer and call this function again. * 2. Returns ARCHIVE_EOF if decompression has been completed. * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data * is broken or you do not set 'last' flag properly. */ #define ST_RD_TRANSLATION 0 #define ST_RD_TRANSLATION_SIZE 1 #define ST_RD_BLOCK_TYPE 2 #define ST_RD_BLOCK_SIZE 3 #define ST_RD_ALIGNMENT 4 #define ST_RD_R0 5 #define ST_RD_R1 6 #define ST_RD_R2 7 #define ST_COPY_UNCOMP1 8 #define ST_COPY_UNCOMP2 9 #define ST_RD_ALIGNED_OFFSET 10 #define ST_RD_VERBATIM 11 #define ST_RD_PRE_MAIN_TREE_256 12 #define ST_MAIN_TREE_256 13 #define ST_RD_PRE_MAIN_TREE_REM 14 #define ST_MAIN_TREE_REM 15 #define ST_RD_PRE_LENGTH_TREE 16 #define ST_LENGTH_TREE 17 #define ST_MAIN 18 #define ST_LENGTH 19 #define ST_OFFSET 20 #define ST_REAL_POS 21 #define ST_COPY 22 static int lzx_decode(struct lzx_stream *strm, int last) { struct lzx_dec *ds = strm->ds; int64_t avail_in; int r; if (ds->error) return (ds->error); avail_in = strm->avail_in; lzx_br_fixup(strm, &(ds->br)); do { if (ds->state < ST_MAIN) r = lzx_read_blocks(strm, last); else { int64_t bytes_written = strm->avail_out; r = lzx_decode_blocks(strm, last); bytes_written -= strm->avail_out; strm->next_out += bytes_written; strm->total_out += bytes_written; } } while (r == 100); strm->total_in += avail_in - strm->avail_in; return (r); } static int lzx_read_blocks(struct lzx_stream *strm, int last) { struct lzx_dec *ds = strm->ds; struct lzx_br *br = &(ds->br); int i, r; for (;;) { switch (ds->state) { case ST_RD_TRANSLATION: if (!lzx_br_read_ahead(strm, br, 1)) { ds->state = ST_RD_TRANSLATION; if (last) goto failed; return (ARCHIVE_OK); } ds->translation = lzx_br_bits(br, 1); lzx_br_consume(br, 1); /* FALL THROUGH */ case ST_RD_TRANSLATION_SIZE: if (ds->translation) { if (!lzx_br_read_ahead(strm, br, 32)) { ds->state = ST_RD_TRANSLATION_SIZE; if (last) goto failed; return (ARCHIVE_OK); } ds->translation_size = lzx_br_bits(br, 16); lzx_br_consume(br, 16); ds->translation_size <<= 16; ds->translation_size |= lzx_br_bits(br, 16); lzx_br_consume(br, 16); } /* FALL THROUGH */ case ST_RD_BLOCK_TYPE: if (!lzx_br_read_ahead(strm, br, 3)) { ds->state = ST_RD_BLOCK_TYPE; if (last) goto failed; return (ARCHIVE_OK); } ds->block_type = lzx_br_bits(br, 3); lzx_br_consume(br, 3); /* Check a block type. */ switch (ds->block_type) { case VERBATIM_BLOCK: case ALIGNED_OFFSET_BLOCK: case UNCOMPRESSED_BLOCK: break; default: goto failed;/* Invalid */ } /* FALL THROUGH */ case ST_RD_BLOCK_SIZE: if (!lzx_br_read_ahead(strm, br, 24)) { ds->state = ST_RD_BLOCK_SIZE; if (last) goto failed; return (ARCHIVE_OK); } ds->block_size = lzx_br_bits(br, 8); lzx_br_consume(br, 8); ds->block_size <<= 16; ds->block_size |= lzx_br_bits(br, 16); lzx_br_consume(br, 16); if (ds->block_size == 0) goto failed; ds->block_bytes_avail = ds->block_size; if (ds->block_type != UNCOMPRESSED_BLOCK) { if (ds->block_type == VERBATIM_BLOCK) ds->state = ST_RD_VERBATIM; else ds->state = ST_RD_ALIGNED_OFFSET; break; } /* FALL THROUGH */ case ST_RD_ALIGNMENT: /* * Handle an Uncompressed Block. */ /* Skip padding to align following field on * 16-bit boundary. */ if (lzx_br_is_unaligned(br)) lzx_br_consume_unaligned_bits(br); else { if (lzx_br_read_ahead(strm, br, 16)) lzx_br_consume(br, 16); else { ds->state = ST_RD_ALIGNMENT; if (last) goto failed; return (ARCHIVE_OK); } } /* Preparation to read repeated offsets R0,R1 and R2. */ ds->rbytes_avail = 0; ds->state = ST_RD_R0; /* FALL THROUGH */ case ST_RD_R0: case ST_RD_R1: case ST_RD_R2: do { uint16_t u16; /* Drain bits in the cache buffer of * bit-stream. */ if (lzx_br_has(br, 32)) { u16 = lzx_br_bits(br, 16); lzx_br_consume(br, 16); archive_le16enc(ds->rbytes, u16); u16 = lzx_br_bits(br, 16); lzx_br_consume(br, 16); archive_le16enc(ds->rbytes+2, u16); ds->rbytes_avail = 4; } else if (lzx_br_has(br, 16)) { u16 = lzx_br_bits(br, 16); lzx_br_consume(br, 16); archive_le16enc(ds->rbytes, u16); ds->rbytes_avail = 2; } if (ds->rbytes_avail < 4 && ds->br.have_odd) { ds->rbytes[ds->rbytes_avail++] = ds->br.odd; ds->br.have_odd = 0; } while (ds->rbytes_avail < 4) { if (strm->avail_in <= 0) { if (last) goto failed; return (ARCHIVE_OK); } ds->rbytes[ds->rbytes_avail++] = *strm->next_in++; strm->avail_in--; } ds->rbytes_avail = 0; if (ds->state == ST_RD_R0) { ds->r0 = archive_le32dec(ds->rbytes); if (ds->r0 < 0) goto failed; ds->state = ST_RD_R1; } else if (ds->state == ST_RD_R1) { ds->r1 = archive_le32dec(ds->rbytes); if (ds->r1 < 0) goto failed; ds->state = ST_RD_R2; } else if (ds->state == ST_RD_R2) { ds->r2 = archive_le32dec(ds->rbytes); if (ds->r2 < 0) goto failed; /* We've gotten all repeated offsets. */ ds->state = ST_COPY_UNCOMP1; } } while (ds->state != ST_COPY_UNCOMP1); /* FALL THROUGH */ case ST_COPY_UNCOMP1: /* * Copy bytes form next_in to next_out directly. */ while (ds->block_bytes_avail) { int l; if (strm->avail_out <= 0) /* Output buffer is empty. */ return (ARCHIVE_OK); if (strm->avail_in <= 0) { /* Input buffer is empty. */ if (last) goto failed; return (ARCHIVE_OK); } l = (int)ds->block_bytes_avail; if (l > ds->w_size - ds->w_pos) l = ds->w_size - ds->w_pos; if (l > strm->avail_out) l = (int)strm->avail_out; if (l > strm->avail_in) l = (int)strm->avail_in; memcpy(strm->next_out, strm->next_in, l); memcpy(&(ds->w_buff[ds->w_pos]), strm->next_in, l); strm->next_in += l; strm->avail_in -= l; strm->next_out += l; strm->avail_out -= l; strm->total_out += l; ds->w_pos = (ds->w_pos + l) & ds->w_mask; ds->block_bytes_avail -= l; } /* FALL THROUGH */ case ST_COPY_UNCOMP2: /* Re-align; skip padding byte. */ if (ds->block_size & 1) { if (strm->avail_in <= 0) { /* Input buffer is empty. */ ds->state = ST_COPY_UNCOMP2; if (last) goto failed; return (ARCHIVE_OK); } strm->next_in++; strm->avail_in --; } /* This block ended. */ ds->state = ST_RD_BLOCK_TYPE; return (ARCHIVE_EOF); /********************/ case ST_RD_ALIGNED_OFFSET: /* * Read Aligned offset tree. */ if (!lzx_br_read_ahead(strm, br, 3 * ds->at.len_size)) { ds->state = ST_RD_ALIGNED_OFFSET; if (last) goto failed; return (ARCHIVE_OK); } memset(ds->at.freq, 0, sizeof(ds->at.freq)); for (i = 0; i < ds->at.len_size; i++) { ds->at.bitlen[i] = lzx_br_bits(br, 3); ds->at.freq[ds->at.bitlen[i]]++; lzx_br_consume(br, 3); } if (!lzx_make_huffman_table(&ds->at)) goto failed; /* FALL THROUGH */ case ST_RD_VERBATIM: ds->loop = 0; /* FALL THROUGH */ case ST_RD_PRE_MAIN_TREE_256: /* * Read Pre-tree for first 256 elements of main tree. */ if (!lzx_read_pre_tree(strm)) { ds->state = ST_RD_PRE_MAIN_TREE_256; if (last) goto failed; return (ARCHIVE_OK); } if (!lzx_make_huffman_table(&ds->pt)) goto failed; ds->loop = 0; /* FALL THROUGH */ case ST_MAIN_TREE_256: /* * Get path lengths of first 256 elements of main tree. */ r = lzx_read_bitlen(strm, &ds->mt, 256); if (r < 0) goto failed; else if (!r) { ds->state = ST_MAIN_TREE_256; if (last) goto failed; return (ARCHIVE_OK); } ds->loop = 0; /* FALL THROUGH */ case ST_RD_PRE_MAIN_TREE_REM: /* * Read Pre-tree for remaining elements of main tree. */ if (!lzx_read_pre_tree(strm)) { ds->state = ST_RD_PRE_MAIN_TREE_REM; if (last) goto failed; return (ARCHIVE_OK); } if (!lzx_make_huffman_table(&ds->pt)) goto failed; ds->loop = 256; /* FALL THROUGH */ case ST_MAIN_TREE_REM: /* * Get path lengths of remaining elements of main tree. */ r = lzx_read_bitlen(strm, &ds->mt, -1); if (r < 0) goto failed; else if (!r) { ds->state = ST_MAIN_TREE_REM; if (last) goto failed; return (ARCHIVE_OK); } if (!lzx_make_huffman_table(&ds->mt)) goto failed; ds->loop = 0; /* FALL THROUGH */ case ST_RD_PRE_LENGTH_TREE: /* * Read Pre-tree for remaining elements of main tree. */ if (!lzx_read_pre_tree(strm)) { ds->state = ST_RD_PRE_LENGTH_TREE; if (last) goto failed; return (ARCHIVE_OK); } if (!lzx_make_huffman_table(&ds->pt)) goto failed; ds->loop = 0; /* FALL THROUGH */ case ST_LENGTH_TREE: /* * Get path lengths of remaining elements of main tree. */ r = lzx_read_bitlen(strm, &ds->lt, -1); if (r < 0) goto failed; else if (!r) { ds->state = ST_LENGTH_TREE; if (last) goto failed; return (ARCHIVE_OK); } if (!lzx_make_huffman_table(&ds->lt)) goto failed; ds->state = ST_MAIN; return (100); } } failed: return (ds->error = ARCHIVE_FAILED); } static int lzx_decode_blocks(struct lzx_stream *strm, int last) { struct lzx_dec *ds = strm->ds; struct lzx_br bre = ds->br; struct huffman *at = &(ds->at), *lt = &(ds->lt), *mt = &(ds->mt); const struct lzx_pos_tbl *pos_tbl = ds->pos_tbl; unsigned char *noutp = strm->next_out; unsigned char *endp = noutp + strm->avail_out; unsigned char *w_buff = ds->w_buff; unsigned char *at_bitlen = at->bitlen; unsigned char *lt_bitlen = lt->bitlen; unsigned char *mt_bitlen = mt->bitlen; size_t block_bytes_avail = ds->block_bytes_avail; int at_max_bits = at->max_bits; int lt_max_bits = lt->max_bits; int mt_max_bits = mt->max_bits; int c, copy_len = ds->copy_len, copy_pos = ds->copy_pos; int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; int length_header = ds->length_header; int offset_bits = ds->offset_bits; int position_slot = ds->position_slot; int r0 = ds->r0, r1 = ds->r1, r2 = ds->r2; int state = ds->state; char block_type = ds->block_type; for (;;) { switch (state) { case ST_MAIN: for (;;) { if (block_bytes_avail == 0) { /* This block ended. */ ds->state = ST_RD_BLOCK_TYPE; ds->br = bre; ds->block_bytes_avail = block_bytes_avail; ds->copy_len = copy_len; ds->copy_pos = copy_pos; ds->length_header = length_header; ds->position_slot = position_slot; ds->r0 = r0; ds->r1 = r1; ds->r2 = r2; ds->w_pos = w_pos; strm->avail_out = endp - noutp; return (ARCHIVE_EOF); } if (noutp >= endp) /* Output buffer is empty. */ goto next_data; if (!lzx_br_read_ahead(strm, &bre, mt_max_bits)) { if (!last) goto next_data; /* Remaining bits are less than * maximum bits(mt.max_bits) but maybe * it still remains as much as we need, * so we should try to use it with * dummy bits. */ c = lzx_decode_huffman(mt, lzx_br_bits_forced( &bre, mt_max_bits)); lzx_br_consume(&bre, mt_bitlen[c]); if (!lzx_br_has(&bre, 0)) goto failed;/* Over read. */ } else { c = lzx_decode_huffman(mt, lzx_br_bits(&bre, mt_max_bits)); lzx_br_consume(&bre, mt_bitlen[c]); } if (c > UCHAR_MAX) break; /* * 'c' is exactly literal code. */ /* Save a decoded code to reference it * afterward. */ w_buff[w_pos] = c; w_pos = (w_pos + 1) & w_mask; /* Store the decoded code to output buffer. */ *noutp++ = c; block_bytes_avail--; } /* * Get a match code, its length and offset. */ c -= UCHAR_MAX + 1; length_header = c & 7; position_slot = c >> 3; /* FALL THROUGH */ case ST_LENGTH: /* * Get a length. */ if (length_header == 7) { if (!lzx_br_read_ahead(strm, &bre, lt_max_bits)) { if (!last) { state = ST_LENGTH; goto next_data; } c = lzx_decode_huffman(lt, lzx_br_bits_forced( &bre, lt_max_bits)); lzx_br_consume(&bre, lt_bitlen[c]); if (!lzx_br_has(&bre, 0)) goto failed;/* Over read. */ } else { c = lzx_decode_huffman(lt, lzx_br_bits(&bre, lt_max_bits)); lzx_br_consume(&bre, lt_bitlen[c]); } copy_len = c + 7 + 2; } else copy_len = length_header + 2; if ((size_t)copy_len > block_bytes_avail) goto failed; /* * Get an offset. */ switch (position_slot) { case 0: /* Use repeated offset 0. */ copy_pos = r0; state = ST_REAL_POS; continue; case 1: /* Use repeated offset 1. */ copy_pos = r1; /* Swap repeated offset. */ r1 = r0; r0 = copy_pos; state = ST_REAL_POS; continue; case 2: /* Use repeated offset 2. */ copy_pos = r2; /* Swap repeated offset. */ r2 = r0; r0 = copy_pos; state = ST_REAL_POS; continue; default: offset_bits = pos_tbl[position_slot].footer_bits; break; } /* FALL THROUGH */ case ST_OFFSET: /* * Get the offset, which is a distance from * current window position. */ if (block_type == ALIGNED_OFFSET_BLOCK && offset_bits >= 3) { int offbits = offset_bits - 3; if (!lzx_br_read_ahead(strm, &bre, offbits)) { state = ST_OFFSET; if (last) goto failed; goto next_data; } copy_pos = lzx_br_bits(&bre, offbits) << 3; /* Get an aligned number. */ if (!lzx_br_read_ahead(strm, &bre, offbits + at_max_bits)) { if (!last) { state = ST_OFFSET; goto next_data; } lzx_br_consume(&bre, offbits); c = lzx_decode_huffman(at, lzx_br_bits_forced(&bre, at_max_bits)); lzx_br_consume(&bre, at_bitlen[c]); if (!lzx_br_has(&bre, 0)) goto failed;/* Over read. */ } else { lzx_br_consume(&bre, offbits); c = lzx_decode_huffman(at, lzx_br_bits(&bre, at_max_bits)); lzx_br_consume(&bre, at_bitlen[c]); } /* Add an aligned number. */ copy_pos += c; } else { if (!lzx_br_read_ahead(strm, &bre, offset_bits)) { state = ST_OFFSET; if (last) goto failed; goto next_data; } copy_pos = lzx_br_bits(&bre, offset_bits); lzx_br_consume(&bre, offset_bits); } copy_pos += pos_tbl[position_slot].base -2; /* Update repeated offset LRU queue. */ r2 = r1; r1 = r0; r0 = copy_pos; /* FALL THROUGH */ case ST_REAL_POS: /* * Compute a real position in window. */ copy_pos = (w_pos - copy_pos) & w_mask; /* FALL THROUGH */ case ST_COPY: /* * Copy several bytes as extracted data from the window * into the output buffer. */ for (;;) { const unsigned char *s; int l; l = copy_len; if (copy_pos > w_pos) { if (l > w_size - copy_pos) l = w_size - copy_pos; } else { if (l > w_size - w_pos) l = w_size - w_pos; } if (noutp + l >= endp) l = (int)(endp - noutp); s = w_buff + copy_pos; if (l >= 8 && ((copy_pos + l < w_pos) || (w_pos + l < copy_pos))) { memcpy(w_buff + w_pos, s, l); memcpy(noutp, s, l); } else { unsigned char *d; int li; d = w_buff + w_pos; for (li = 0; li < l; li++) noutp[li] = d[li] = s[li]; } noutp += l; copy_pos = (copy_pos + l) & w_mask; w_pos = (w_pos + l) & w_mask; block_bytes_avail -= l; if (copy_len <= l) /* A copy of current pattern ended. */ break; copy_len -= l; if (noutp >= endp) { /* Output buffer is empty. */ state = ST_COPY; goto next_data; } } state = ST_MAIN; break; } } failed: return (ds->error = ARCHIVE_FAILED); next_data: ds->br = bre; ds->block_bytes_avail = block_bytes_avail; ds->copy_len = copy_len; ds->copy_pos = copy_pos; ds->length_header = length_header; ds->offset_bits = offset_bits; ds->position_slot = position_slot; ds->r0 = r0; ds->r1 = r1; ds->r2 = r2; ds->state = state; ds->w_pos = w_pos; strm->avail_out = endp - noutp; return (ARCHIVE_OK); } static int lzx_read_pre_tree(struct lzx_stream *strm) { struct lzx_dec *ds = strm->ds; struct lzx_br *br = &(ds->br); int i; if (ds->loop == 0) memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); for (i = ds->loop; i < ds->pt.len_size; i++) { if (!lzx_br_read_ahead(strm, br, 4)) { ds->loop = i; return (0); } ds->pt.bitlen[i] = lzx_br_bits(br, 4); ds->pt.freq[ds->pt.bitlen[i]]++; lzx_br_consume(br, 4); } ds->loop = i; return (1); } /* * Read a bunch of bit-lengths from pre-tree. */ static int lzx_read_bitlen(struct lzx_stream *strm, struct huffman *d, int end) { struct lzx_dec *ds = strm->ds; struct lzx_br *br = &(ds->br); int c, i, j, ret, same; unsigned rbits; i = ds->loop; if (i == 0) memset(d->freq, 0, sizeof(d->freq)); ret = 0; if (end < 0) end = d->len_size; while (i < end) { ds->loop = i; if (!lzx_br_read_ahead(strm, br, ds->pt.max_bits)) goto getdata; rbits = lzx_br_bits(br, ds->pt.max_bits); c = lzx_decode_huffman(&(ds->pt), rbits); switch (c) { case 17:/* several zero lengths, from 4 to 19. */ if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+4)) goto getdata; lzx_br_consume(br, ds->pt.bitlen[c]); same = lzx_br_bits(br, 4) + 4; if (i + same > end) return (-1);/* Invalid */ lzx_br_consume(br, 4); for (j = 0; j < same; j++) d->bitlen[i++] = 0; break; case 18:/* many zero lengths, from 20 to 51. */ if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+5)) goto getdata; lzx_br_consume(br, ds->pt.bitlen[c]); same = lzx_br_bits(br, 5) + 20; if (i + same > end) return (-1);/* Invalid */ lzx_br_consume(br, 5); memset(d->bitlen + i, 0, same); i += same; break; case 19:/* a few same lengths. */ if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+1+ds->pt.max_bits)) goto getdata; lzx_br_consume(br, ds->pt.bitlen[c]); same = lzx_br_bits(br, 1) + 4; if (i + same > end) return (-1); lzx_br_consume(br, 1); rbits = lzx_br_bits(br, ds->pt.max_bits); c = lzx_decode_huffman(&(ds->pt), rbits); lzx_br_consume(br, ds->pt.bitlen[c]); c = (d->bitlen[i] - c + 17) % 17; if (c < 0) return (-1);/* Invalid */ for (j = 0; j < same; j++) d->bitlen[i++] = c; d->freq[c] += same; break; default: lzx_br_consume(br, ds->pt.bitlen[c]); c = (d->bitlen[i] - c + 17) % 17; if (c < 0) return (-1);/* Invalid */ d->freq[c]++; d->bitlen[i++] = c; break; } } ret = 1; getdata: ds->loop = i; return (ret); } static int lzx_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) { int bits; if (hf->bitlen == NULL || hf->len_size != (int)len_size) { free(hf->bitlen); hf->bitlen = calloc(len_size, sizeof(hf->bitlen[0])); if (hf->bitlen == NULL) return (ARCHIVE_FATAL); hf->len_size = (int)len_size; } else memset(hf->bitlen, 0, len_size * sizeof(hf->bitlen[0])); if (hf->tbl == NULL) { if (tbl_bits < HTBL_BITS) bits = tbl_bits; else bits = HTBL_BITS; hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); if (hf->tbl == NULL) return (ARCHIVE_FATAL); hf->tbl_bits = tbl_bits; } if (hf->tree == NULL && tbl_bits > HTBL_BITS) { hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); if (hf->tree == NULL) return (ARCHIVE_FATAL); } return (ARCHIVE_OK); } static void lzx_huffman_free(struct huffman *hf) { free(hf->bitlen); free(hf->tbl); free(hf->tree); } /* * Make a huffman coding table. */ static int lzx_make_huffman_table(struct huffman *hf) { uint16_t *tbl; const unsigned char *bitlen; int bitptn[17], weight[17]; int i, maxbits = 0, ptn, tbl_size, w; int diffbits, len_avail; /* * Initialize bit patterns. */ ptn = 0; for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { bitptn[i] = ptn; weight[i] = w; if (hf->freq[i]) { ptn += hf->freq[i] * w; maxbits = i; } } if ((ptn & 0xffff) != 0 || maxbits > hf->tbl_bits) return (0);/* Invalid */ hf->max_bits = maxbits; /* * Cut out extra bits which we won't house in the table. * This preparation reduces the same calculation in the for-loop * making the table. */ if (maxbits < 16) { int ebits = 16 - maxbits; for (i = 1; i <= maxbits; i++) { bitptn[i] >>= ebits; weight[i] >>= ebits; } } if (maxbits > HTBL_BITS) { int htbl_max; uint16_t *p; diffbits = maxbits - HTBL_BITS; for (i = 1; i <= HTBL_BITS; i++) { bitptn[i] >>= diffbits; weight[i] >>= diffbits; } htbl_max = bitptn[HTBL_BITS] + weight[HTBL_BITS] * hf->freq[HTBL_BITS]; p = &(hf->tbl[htbl_max]); while (p < &hf->tbl[1U<shift_bits = diffbits; /* * Make the table. */ tbl_size = 1 << HTBL_BITS; tbl = hf->tbl; bitlen = hf->bitlen; len_avail = hf->len_size; hf->tree_used = 0; for (i = 0; i < len_avail; i++) { uint16_t *p; int len, cnt; uint16_t bit; int extlen; struct htree_t *ht; if (bitlen[i] == 0) continue; /* Get a bit pattern */ len = bitlen[i]; ptn = bitptn[len]; cnt = weight[len]; if (len <= HTBL_BITS) { /* Calculate next bit pattern */ if ((bitptn[len] = ptn + cnt) > tbl_size) return (0);/* Invalid */ /* Update the table */ p = &(tbl[ptn]); while (--cnt >= 0) p[cnt] = (uint16_t)i; continue; } /* * A bit length is too big to be housed to a direct table, * so we use a tree model for its extra bits. */ bitptn[len] = ptn + cnt; bit = 1U << (diffbits -1); extlen = len - HTBL_BITS; p = &(tbl[ptn >> diffbits]); if (*p == 0) { *p = len_avail + hf->tree_used; ht = &(hf->tree[hf->tree_used++]); if (hf->tree_used > hf->tree_avail) return (0);/* Invalid */ ht->left = 0; ht->right = 0; } else { if (*p < len_avail || *p >= (len_avail + hf->tree_used)) return (0);/* Invalid */ ht = &(hf->tree[*p - len_avail]); } while (--extlen > 0) { if (ptn & bit) { if (ht->left < len_avail) { ht->left = len_avail + hf->tree_used; ht = &(hf->tree[hf->tree_used++]); if (hf->tree_used > hf->tree_avail) return (0);/* Invalid */ ht->left = 0; ht->right = 0; } else { ht = &(hf->tree[ht->left - len_avail]); } } else { if (ht->right < len_avail) { ht->right = len_avail + hf->tree_used; ht = &(hf->tree[hf->tree_used++]); if (hf->tree_used > hf->tree_avail) return (0);/* Invalid */ ht->left = 0; ht->right = 0; } else { ht = &(hf->tree[ht->right - len_avail]); } } bit >>= 1; } if (ptn & bit) { if (ht->left != 0) return (0);/* Invalid */ ht->left = (uint16_t)i; } else { if (ht->right != 0) return (0);/* Invalid */ ht->right = (uint16_t)i; } } return (1); } static int lzx_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) { struct htree_t *ht; int extlen; ht = hf->tree; extlen = hf->shift_bits; while (c >= hf->len_size) { c -= hf->len_size; if (extlen-- <= 0 || c >= hf->tree_used) return (0); if (rbits & (1U << extlen)) c = ht[c].left; else c = ht[c].right; } return (c); } static inline int lzx_decode_huffman(struct huffman *hf, unsigned rbits) { int c; /* * At first search an index table for a bit pattern. * If it fails, search a huffman tree for. */ c = hf->tbl[rbits >> hf->shift_bits]; if (c < hf->len_size) return (c); /* This bit pattern needs to be found out at a huffman tree. */ return (lzx_decode_huffman_tree(hf, rbits, c)); } diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c index b977cb7400fb..071d766b74b7 100644 --- a/libarchive/archive_read_support_format_tar.c +++ b/libarchive/archive_read_support_format_tar.c @@ -1,2853 +1,2834 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle * Copyright (c) 2011-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $"); #ifdef HAVE_ERRNO_H #include #endif #include #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #include "archive.h" #include "archive_acl_private.h" /* For ACL parsing routines. */ #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_private.h" #include "archive_read_private.h" #define tar_min(a,b) ((a) < (b) ? (a) : (b)) /* * Layout of POSIX 'ustar' tar header. */ struct archive_entry_header_ustar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; /* "old format" header ends here */ char magic[6]; /* For POSIX: "ustar\0" */ char version[2]; /* For POSIX: "00" */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char prefix[155]; }; /* * Structure of GNU tar header */ struct gnu_sparse { char offset[12]; char numbytes[12]; }; struct archive_entry_header_gnutar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char atime[12]; char ctime[12]; char offset[12]; char longnames[4]; char unused[1]; struct gnu_sparse sparse[4]; char isextended[1]; char realsize[12]; /* * Old GNU format doesn't use POSIX 'prefix' field; they use * the 'L' (longname) entry instead. */ }; /* * Data specific to this format. */ struct sparse_block { struct sparse_block *next; int64_t offset; int64_t remaining; int hole; }; struct tar { struct archive_string acl_text; struct archive_string entry_pathname; /* For "GNU.sparse.name" and other similar path extensions. */ struct archive_string entry_pathname_override; struct archive_string entry_linkpath; struct archive_string entry_uname; struct archive_string entry_gname; struct archive_string longlink; struct archive_string longname; struct archive_string pax_header; struct archive_string pax_global; struct archive_string line; int pax_hdrcharset_binary; int header_recursion_depth; int64_t entry_bytes_remaining; int64_t entry_offset; int64_t entry_padding; int64_t entry_bytes_unconsumed; int64_t realsize; int sparse_allowed; struct sparse_block *sparse_list; struct sparse_block *sparse_last; int64_t sparse_offset; int64_t sparse_numbytes; int sparse_gnu_major; int sparse_gnu_minor; char sparse_gnu_pending; struct archive_string localname; struct archive_string_conv *opt_sconv; struct archive_string_conv *sconv; struct archive_string_conv *sconv_acl; struct archive_string_conv *sconv_default; int init_default_conversion; int compat_2x; int process_mac_extensions; int read_concatenated_archives; }; static int archive_block_is_null(const char *p); static char *base64_decode(const char *, size_t, size_t *); static int gnu_add_sparse_entry(struct archive_read *, struct tar *, int64_t offset, int64_t remaining); static void gnu_clear_sparse_list(struct tar *); static int gnu_sparse_old_read(struct archive_read *, struct tar *, const struct archive_entry_header_gnutar *header, size_t *); static int gnu_sparse_old_parse(struct archive_read *, struct tar *, const struct gnu_sparse *sparse, int length); static int gnu_sparse_01_parse(struct archive_read *, struct tar *, const char *); static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, size_t *); static int header_Solaris_ACL(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_common(struct archive_read *, struct tar *, struct archive_entry *, const void *); static int header_old_tar(struct archive_read *, struct tar *, struct archive_entry *, const void *); static int header_pax_extensions(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_pax_global(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_longlink(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_longname(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int read_mac_metadata_blob(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_volume(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_ustar(struct archive_read *, struct tar *, struct archive_entry *, const void *h); static int header_gnutar(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int archive_read_format_tar_bid(struct archive_read *, int); static int archive_read_format_tar_options(struct archive_read *, const char *, const char *); static int archive_read_format_tar_cleanup(struct archive_read *); static int archive_read_format_tar_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); static int archive_read_format_tar_skip(struct archive_read *a); static int archive_read_format_tar_read_header(struct archive_read *, struct archive_entry *); static int checksum(struct archive_read *, const void *); static int pax_attribute(struct archive_read *, struct tar *, struct archive_entry *, const char *key, const char *value); static int pax_header(struct archive_read *, struct tar *, struct archive_entry *, char *attr); static void pax_time(const char *, int64_t *sec, long *nanos); static ssize_t readline(struct archive_read *, struct tar *, const char **, ssize_t limit, size_t *); static int read_body_to_string(struct archive_read *, struct tar *, struct archive_string *, const void *h, size_t *); static int solaris_sparse_parse(struct archive_read *, struct tar *, struct archive_entry *, const char *); static int64_t tar_atol(const char *, size_t); static int64_t tar_atol10(const char *, size_t); static int64_t tar_atol256(const char *, size_t); static int64_t tar_atol8(const char *, size_t); static int tar_read_header(struct archive_read *, struct tar *, struct archive_entry *, size_t *); static int tohex(int c); static char *url_decode(const char *); static void tar_flush_unconsumed(struct archive_read *, size_t *); int archive_read_support_format_gnutar(struct archive *a) { archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); return (archive_read_support_format_tar(a)); } int archive_read_support_format_tar(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct tar *tar; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); tar = (struct tar *)calloc(1, sizeof(*tar)); #ifdef HAVE_COPYFILE_H /* Set this by default on Mac OS. */ tar->process_mac_extensions = 1; #endif if (tar == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate tar data"); return (ARCHIVE_FATAL); } r = __archive_read_register_format(a, tar, "tar", archive_read_format_tar_bid, archive_read_format_tar_options, archive_read_format_tar_read_header, archive_read_format_tar_read_data, archive_read_format_tar_skip, NULL, archive_read_format_tar_cleanup, NULL, NULL); if (r != ARCHIVE_OK) free(tar); return (ARCHIVE_OK); } static int archive_read_format_tar_cleanup(struct archive_read *a) { struct tar *tar; tar = (struct tar *)(a->format->data); gnu_clear_sparse_list(tar); archive_string_free(&tar->acl_text); archive_string_free(&tar->entry_pathname); archive_string_free(&tar->entry_pathname_override); archive_string_free(&tar->entry_linkpath); archive_string_free(&tar->entry_uname); archive_string_free(&tar->entry_gname); archive_string_free(&tar->line); archive_string_free(&tar->pax_global); archive_string_free(&tar->pax_header); archive_string_free(&tar->longname); archive_string_free(&tar->longlink); archive_string_free(&tar->localname); free(tar); (a->format->data) = NULL; return (ARCHIVE_OK); } /* * Validate number field * - * Flags: - * 1 - allow double \0 at field end + * This has to be pretty lenient in order to accomodate the enormous + * variety of tar writers in the world: + * = POSIX ustar requires octal values with leading zeros and + * specific termination on fields + * = Many writers use different termination (in particular, libarchive + * omits terminator bytes to squeeze one or two more digits) + * = Many writers pad with space and omit leading zeros + * = GNU tar and star write base-256 values if numbers are too + * big to be represented in octal + * + * This should tolerate all variants in use. It will reject a field + * where the writer just left garbage after a trailing NUL. */ static int -validate_number_field(const char* p_field, size_t i_size, int flags) +validate_number_field(const char* p_field, size_t i_size) { unsigned char marker = (unsigned char)p_field[0]; - /* octal? */ - if ((marker >= '0' && marker <= '7') || marker == ' ') { + if (marker == 128 || marker == 255 || marker == 0) { + /* Base-256 marker, there's nothing we can check. */ + return 1; + } else { + /* Must be octal */ size_t i = 0; - int octal_found = 0; - for (i = 0; i < i_size; ++i) { - switch (p_field[i]) - { - case ' ': - /* skip any leading spaces and trailing space */ - if (octal_found == 0 || i == i_size - 1) { - continue; - } - break; - case '\0': - /* - * null should be allowed only at the end - * - * Perl Archive::Tar terminates some fields - * with two nulls. We must allow this to stay - * compatible. - */ - if (i != i_size - 1) { - if (((flags & 1) == 0) - || i != i_size - 2) - return 0; - } - break; - /* rest must be octal digits */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - ++octal_found; - break; + /* Skip any leading spaces */ + while (i < i_size && p_field[i] == ' ') { + ++i; + } + /* Must be at least one octal digit. */ + if (i >= i_size || p_field[i] < '0' || p_field[i] > '7') { + return 0; + } + /* Skip remaining octal digits. */ + while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') { + ++i; + } + /* Any remaining characters must be space or NUL padding. */ + while (i < i_size) { + if (p_field[i] != ' ' && p_field[i] != 0) { + return 0; } + ++i; } - return octal_found > 0; - } - /* base 256 (i.e. binary number) */ - else if (marker == 128 || marker == 255 || marker == 0) { - /* nothing to check */ return 1; } - /* not a number field */ - else { - return 0; - } } static int archive_read_format_tar_bid(struct archive_read *a, int best_bid) { int bid; const char *h; const struct archive_entry_header_ustar *header; (void)best_bid; /* UNUSED */ bid = 0; /* Now let's look at the actual header and see if it matches. */ h = __archive_read_ahead(a, 512, NULL); if (h == NULL) return (-1); /* If it's an end-of-archive mark, we can handle it. */ if (h[0] == 0 && archive_block_is_null(h)) { /* * Usually, I bid the number of bits verified, but * in this case, 4096 seems excessive so I picked 10 as * an arbitrary but reasonable-seeming value. */ return (10); } /* If it's not an end-of-archive mark, it must have a valid checksum.*/ if (!checksum(a, h)) return (0); bid += 48; /* Checksum is usually 6 octal digits. */ header = (const struct archive_entry_header_ustar *)h; /* Recognize POSIX formats. */ if ((memcmp(header->magic, "ustar\0", 6) == 0) && (memcmp(header->version, "00", 2) == 0)) bid += 56; /* Recognize GNU tar format. */ if ((memcmp(header->magic, "ustar ", 6) == 0) && (memcmp(header->version, " \0", 2) == 0)) bid += 56; /* Type flag must be null, digit or A-Z, a-z. */ if (header->typeflag[0] != 0 && !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) return (0); bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ /* * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields. - * These are usually octal numbers but GNU tar encodes "big" values as - * base256 and leading zeroes are sometimes replaced by spaces. - * Even the null terminator is sometimes omitted. Anyway, must be - * checked to avoid false positives. - * - * Perl Archive::Tar does not follow the spec and terminates mode, uid, - * gid, rdevmajor and rdevminor with a double \0. For compatibility - * reasons we allow this deviation. */ if (bid > 0 && ( - validate_number_field(header->mode, sizeof(header->mode), 1) == 0 - || validate_number_field(header->uid, sizeof(header->uid), 1) == 0 - || validate_number_field(header->gid, sizeof(header->gid), 1) == 0 - || validate_number_field(header->mtime, sizeof(header->mtime), - 0) == 0 - || validate_number_field(header->size, sizeof(header->size), 0) == 0 - || validate_number_field(header->rdevmajor, - sizeof(header->rdevmajor), 1) == 0 - || validate_number_field(header->rdevminor, - sizeof(header->rdevminor), 1) == 0)) { + validate_number_field(header->mode, sizeof(header->mode)) == 0 + || validate_number_field(header->uid, sizeof(header->uid)) == 0 + || validate_number_field(header->gid, sizeof(header->gid)) == 0 + || validate_number_field(header->mtime, sizeof(header->mtime)) == 0 + || validate_number_field(header->size, sizeof(header->size)) == 0 + || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0 + || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0)) { bid = 0; } return (bid); } static int archive_read_format_tar_options(struct archive_read *a, const char *key, const char *val) { struct tar *tar; int ret = ARCHIVE_FAILED; tar = (struct tar *)(a->format->data); if (strcmp(key, "compat-2x") == 0) { /* Handle UTF-8 filnames as libarchive 2.x */ tar->compat_2x = (val != NULL && val[0] != 0); tar->init_default_conversion = tar->compat_2x; return (ARCHIVE_OK); } else if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "tar: hdrcharset option needs a character-set name"); else { tar->opt_sconv = archive_string_conversion_from_charset( &a->archive, val, 0); if (tar->opt_sconv != NULL) ret = ARCHIVE_OK; else ret = ARCHIVE_FATAL; } return (ret); } else if (strcmp(key, "mac-ext") == 0) { tar->process_mac_extensions = (val != NULL && val[0] != 0); return (ARCHIVE_OK); } else if (strcmp(key, "read_concatenated_archives") == 0) { tar->read_concatenated_archives = (val != NULL && val[0] != 0); return (ARCHIVE_OK); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } /* utility function- this exists to centralize the logic of tracking * how much unconsumed data we have floating around, and to consume * anything outstanding since we're going to do read_aheads */ static void tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) { if (*unconsumed) { /* void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); * this block of code is to poison claimed unconsumed space, ensuring * things break if it is in use still. * currently it WILL break things, so enable it only for debugging this issue if (data) { memset(data, 0xff, *unconsumed); } */ __archive_read_consume(a, *unconsumed); *unconsumed = 0; } } /* * The function invoked by archive_read_next_header(). This * just sets up a few things and then calls the internal * tar_read_header() function below. */ static int archive_read_format_tar_read_header(struct archive_read *a, struct archive_entry *entry) { /* * When converting tar archives to cpio archives, it is * essential that each distinct file have a distinct inode * number. To simplify this, we keep a static count here to * assign fake dev/inode numbers to each tar entry. Note that * pax format archives may overwrite this with something more * useful. * * Ideally, we would track every file read from the archive so * that we could assign the same dev/ino pair to hardlinks, * but the memory required to store a complete lookup table is * probably not worthwhile just to support the relatively * obscure tar->cpio conversion case. */ static int default_inode; static int default_dev; struct tar *tar; const char *p; const wchar_t *wp; int r; size_t l, unconsumed = 0; /* Assign default device/inode values. */ archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ /* Limit generated st_ino number to 16 bits. */ if (default_inode >= 0xffff) { ++default_dev; default_inode = 0; } tar = (struct tar *)(a->format->data); tar->entry_offset = 0; gnu_clear_sparse_list(tar); tar->realsize = -1; /* Mark this as "unset" */ /* Setup default string conversion. */ tar->sconv = tar->opt_sconv; if (tar->sconv == NULL) { if (!tar->init_default_conversion) { tar->sconv_default = archive_string_default_conversion_for_read(&(a->archive)); tar->init_default_conversion = 1; } tar->sconv = tar->sconv_default; } r = tar_read_header(a, tar, entry, &unconsumed); tar_flush_unconsumed(a, &unconsumed); /* * "non-sparse" files are really just sparse files with * a single block. */ if (tar->sparse_list == NULL) { if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) != ARCHIVE_OK) return (ARCHIVE_FATAL); } else { struct sparse_block *sb; for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { if (!sb->hole) archive_entry_sparse_add_entry(entry, sb->offset, sb->remaining); } } if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { /* * "Regular" entry with trailing '/' is really * directory: This is needed for certain old tar * variants and even for some broken newer ones. */ if ((wp = archive_entry_pathname_w(entry)) != NULL) { l = wcslen(wp); if (l > 0 && wp[l - 1] == L'/') { archive_entry_set_filetype(entry, AE_IFDIR); } } else if ((p = archive_entry_pathname(entry)) != NULL) { l = strlen(p); if (l > 0 && p[l - 1] == '/') { archive_entry_set_filetype(entry, AE_IFDIR); } } } return (r); } static int archive_read_format_tar_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { ssize_t bytes_read; struct tar *tar; struct sparse_block *p; tar = (struct tar *)(a->format->data); for (;;) { /* Remove exhausted entries from sparse list. */ while (tar->sparse_list != NULL && tar->sparse_list->remaining == 0) { p = tar->sparse_list; tar->sparse_list = p->next; free(p); } if (tar->entry_bytes_unconsumed) { __archive_read_consume(a, tar->entry_bytes_unconsumed); tar->entry_bytes_unconsumed = 0; } /* If we're at end of file, return EOF. */ if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) { if (__archive_read_consume(a, tar->entry_padding) < 0) return (ARCHIVE_FATAL); tar->entry_padding = 0; *buff = NULL; *size = 0; *offset = tar->realsize; return (ARCHIVE_EOF); } *buff = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read < 0) return (ARCHIVE_FATAL); if (*buff == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Truncated tar archive"); return (ARCHIVE_FATAL); } if (bytes_read > tar->entry_bytes_remaining) bytes_read = (ssize_t)tar->entry_bytes_remaining; /* Don't read more than is available in the * current sparse block. */ if (tar->sparse_list->remaining < bytes_read) bytes_read = (ssize_t)tar->sparse_list->remaining; *size = bytes_read; *offset = tar->sparse_list->offset; tar->sparse_list->remaining -= bytes_read; tar->sparse_list->offset += bytes_read; tar->entry_bytes_remaining -= bytes_read; tar->entry_bytes_unconsumed = bytes_read; if (!tar->sparse_list->hole) return (ARCHIVE_OK); /* Current is hole data and skip this. */ } } static int archive_read_format_tar_skip(struct archive_read *a) { int64_t bytes_skipped; int64_t request; struct sparse_block *p; struct tar* tar; tar = (struct tar *)(a->format->data); /* Do not consume the hole of a sparse file. */ request = 0; for (p = tar->sparse_list; p != NULL; p = p->next) { if (!p->hole) { if (p->remaining >= INT64_MAX - request) { return ARCHIVE_FATAL; } request += p->remaining; } } if (request > tar->entry_bytes_remaining) request = tar->entry_bytes_remaining; request += tar->entry_padding + tar->entry_bytes_unconsumed; bytes_skipped = __archive_read_consume(a, request); if (bytes_skipped < 0) return (ARCHIVE_FATAL); tar->entry_bytes_remaining = 0; tar->entry_bytes_unconsumed = 0; tar->entry_padding = 0; /* Free the sparse list. */ gnu_clear_sparse_list(tar); return (ARCHIVE_OK); } /* * This function recursively interprets all of the headers associated * with a single entry. */ static int tar_read_header(struct archive_read *a, struct tar *tar, struct archive_entry *entry, size_t *unconsumed) { ssize_t bytes; int err; const char *h; const struct archive_entry_header_ustar *header; const struct archive_entry_header_gnutar *gnuheader; /* Loop until we find a workable header record. */ for (;;) { tar_flush_unconsumed(a, unconsumed); /* Read 512-byte header record */ h = __archive_read_ahead(a, 512, &bytes); if (bytes < 0) return ((int)bytes); if (bytes == 0) { /* EOF at a block boundary. */ /* Some writers do omit the block of nulls. */ return (ARCHIVE_EOF); } if (bytes < 512) { /* Short block at EOF; this is bad. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive"); return (ARCHIVE_FATAL); } *unconsumed = 512; /* Header is workable if it's not an end-of-archive mark. */ if (h[0] != 0 || !archive_block_is_null(h)) break; /* Ensure format is set for archives with only null blocks. */ if (a->archive.archive_format_name == NULL) { a->archive.archive_format = ARCHIVE_FORMAT_TAR; a->archive.archive_format_name = "tar"; } if (!tar->read_concatenated_archives) { /* Try to consume a second all-null record, as well. */ tar_flush_unconsumed(a, unconsumed); h = __archive_read_ahead(a, 512, NULL); if (h != NULL && h[0] == 0 && archive_block_is_null(h)) __archive_read_consume(a, 512); archive_clear_error(&a->archive); return (ARCHIVE_EOF); } /* * We're reading concatenated archives, ignore this block and * loop to get the next. */ } /* * Note: If the checksum fails and we return ARCHIVE_RETRY, * then the client is likely to just retry. This is a very * crude way to search for the next valid header! * * TODO: Improve this by implementing a real header scan. */ if (!checksum(a, h)) { tar_flush_unconsumed(a, unconsumed); archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); return (ARCHIVE_RETRY); /* Retryable: Invalid header */ } if (++tar->header_recursion_depth > 32) { tar_flush_unconsumed(a, unconsumed); archive_set_error(&a->archive, EINVAL, "Too many special headers"); return (ARCHIVE_WARN); } /* Determine the format variant. */ header = (const struct archive_entry_header_ustar *)h; switch(header->typeflag[0]) { case 'A': /* Solaris tar ACL */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "Solaris tar"; err = header_Solaris_ACL(a, tar, entry, h, unconsumed); break; case 'g': /* POSIX-standard 'g' header. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format"; err = header_pax_global(a, tar, entry, h, unconsumed); if (err == ARCHIVE_EOF) return (err); break; case 'K': /* Long link name (GNU tar, others) */ err = header_longlink(a, tar, entry, h, unconsumed); break; case 'L': /* Long filename (GNU tar, others) */ err = header_longname(a, tar, entry, h, unconsumed); break; case 'V': /* GNU volume header */ err = header_volume(a, tar, entry, h, unconsumed); break; case 'X': /* Used by SUN tar; same as 'x'. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format (Sun variant)"; err = header_pax_extensions(a, tar, entry, h, unconsumed); break; case 'x': /* POSIX-standard 'x' header. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format"; err = header_pax_extensions(a, tar, entry, h, unconsumed); break; default: gnuheader = (const struct archive_entry_header_gnutar *)h; if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; a->archive.archive_format_name = "GNU tar format"; err = header_gnutar(a, tar, entry, h, unconsumed); } else if (memcmp(header->magic, "ustar", 5) == 0) { if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; a->archive.archive_format_name = "POSIX ustar format"; } err = header_ustar(a, tar, entry, h); } else { a->archive.archive_format = ARCHIVE_FORMAT_TAR; a->archive.archive_format_name = "tar (non-POSIX)"; err = header_old_tar(a, tar, entry, h); } } if (err == ARCHIVE_FATAL) return (err); tar_flush_unconsumed(a, unconsumed); h = NULL; header = NULL; --tar->header_recursion_depth; /* Yuck. Apple's design here ends up storing long pathname * extensions for both the AppleDouble extension entry and the * regular entry. */ if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && tar->header_recursion_depth == 0 && tar->process_mac_extensions) { int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); if (err2 < err) err = err2; } /* We return warnings or success as-is. Anything else is fatal. */ if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { if (tar->sparse_gnu_pending) { if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) { ssize_t bytes_read; tar->sparse_gnu_pending = 0; /* Read initial sparse map. */ bytes_read = gnu_sparse_10_read(a, tar, unconsumed); tar->entry_bytes_remaining -= bytes_read; if (bytes_read < 0) return ((int)bytes_read); } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Unrecognized GNU sparse file format"); return (ARCHIVE_WARN); } tar->sparse_gnu_pending = 0; } return (err); } if (err == ARCHIVE_EOF) /* EOF when recursively reading a header is bad. */ archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); return (ARCHIVE_FATAL); } /* * Return true if block checksum is correct. */ static int checksum(struct archive_read *a, const void *h) { const unsigned char *bytes; const struct archive_entry_header_ustar *header; int check, sum; size_t i; (void)a; /* UNUSED */ bytes = (const unsigned char *)h; header = (const struct archive_entry_header_ustar *)h; /* Checksum field must hold an octal number */ for (i = 0; i < sizeof(header->checksum); ++i) { char c = header->checksum[i]; if (c != ' ' && c != '\0' && (c < '0' || c > '7')) return 0; } /* * Test the checksum. Note that POSIX specifies _unsigned_ * bytes for this calculation. */ sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); check = 0; for (i = 0; i < 148; i++) check += (unsigned char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (unsigned char)bytes[i]; if (sum == check) return (1); /* * Repeat test with _signed_ bytes, just in case this archive * was created by an old BSD, Solaris, or HP-UX tar with a * broken checksum calculation. */ check = 0; for (i = 0; i < 148; i++) check += (signed char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (signed char)bytes[i]; if (sum == check) return (1); return (0); } /* * Return true if this block contains only nulls. */ static int archive_block_is_null(const char *p) { unsigned i; for (i = 0; i < 512; i++) if (*p++) return (0); return (1); } /* * Interpret 'A' Solaris ACL header */ static int header_Solaris_ACL(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { const struct archive_entry_header_ustar *header; size_t size; int err; int64_t type; char *acl, *p; /* * read_body_to_string adds a NUL terminator, but we need a little * more to make sure that we don't overrun acl_text later. */ header = (const struct archive_entry_header_ustar *)h; size = (size_t)tar_atol(header->size, sizeof(header->size)); err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Recursively read next header */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* TODO: Examine the first characters to see if this * is an AIX ACL descriptor. We'll likely never support * them, but it would be polite to recognize and warn when * we do see them. */ /* Leading octal number indicates ACL type and number of entries. */ p = acl = tar->acl_text.s; type = 0; while (*p != '\0' && p < acl + size) { if (*p < '0' || *p > '7') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (invalid digit)"); return(ARCHIVE_WARN); } type <<= 3; type += *p - '0'; if (type > 077777777) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (count too large)"); return (ARCHIVE_WARN); } p++; } switch ((int)type & ~0777777) { case 01000000: /* POSIX.1e ACL */ break; case 03000000: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Solaris NFSv4 ACLs not supported"); return (ARCHIVE_WARN); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unsupported type %o)", (int)type); return (ARCHIVE_WARN); } p++; if (p >= acl + size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (body overflow)"); return(ARCHIVE_WARN); } /* ACL text is null-terminated; find the end. */ size -= (p - acl); acl = p; while (*p != '\0' && p < acl + size) p++; if (tar->sconv_acl == NULL) { tar->sconv_acl = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (tar->sconv_acl == NULL) return (ARCHIVE_FATAL); } archive_strncpy(&(tar->localname), acl, p - acl); err = archive_acl_parse_l(archive_entry_acl(entry), tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl); if (err != ARCHIVE_OK) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for ACL"); } else archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unparsable)"); } return (err); } /* * Interpret 'K' long linkname header. */ static int header_longlink(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); if (err != ARCHIVE_OK) return (err); err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* Set symlink if symlink already set, else hardlink. */ archive_entry_copy_link(entry, tar->longlink.s); return (ARCHIVE_OK); } static int set_conversion_failed_error(struct archive_read *a, struct archive_string_conv *sconv, const char *name) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for %s", name); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "%s can't be converted from %s to current locale.", name, archive_string_conversion_charset_name(sconv)); return (ARCHIVE_WARN); } /* * Interpret 'L' long filename header. */ static int header_longname(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Read and parse "real" header, then override name. */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); if (archive_entry_copy_pathname_l(entry, tar->longname.s, archive_strlen(&(tar->longname)), tar->sconv) != 0) err = set_conversion_failed_error(a, tar->sconv, "Pathname"); return (err); } /* * Interpret 'V' GNU tar volume header. */ static int header_volume(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { (void)h; /* Just skip this and read the next header. */ return (tar_read_header(a, tar, entry, unconsumed)); } /* * Read body of an archive entry into an archive_string object. */ static int read_body_to_string(struct archive_read *a, struct tar *tar, struct archive_string *as, const void *h, size_t *unconsumed) { int64_t size; const struct archive_entry_header_ustar *header; const void *src; (void)tar; /* UNUSED */ header = (const struct archive_entry_header_ustar *)h; size = tar_atol(header->size, sizeof(header->size)); if ((size > 1048576) || (size < 0)) { archive_set_error(&a->archive, EINVAL, "Special header too large"); return (ARCHIVE_FATAL); } /* Fail if we can't make our buffer big enough. */ if (archive_string_ensure(as, (size_t)size+1) == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory"); return (ARCHIVE_FATAL); } tar_flush_unconsumed(a, unconsumed); /* Read the body into the string. */ *unconsumed = (size_t)((size + 511) & ~ 511); src = __archive_read_ahead(a, *unconsumed, NULL); if (src == NULL) { *unconsumed = 0; return (ARCHIVE_FATAL); } memcpy(as->s, src, (size_t)size); as->s[size] = '\0'; as->length = (size_t)size; return (ARCHIVE_OK); } /* * Parse out common header elements. * * This would be the same as header_old_tar, except that the * filename is handled slightly differently for old and POSIX * entries (POSIX entries support a 'prefix'). This factoring * allows header_old_tar and header_ustar * to handle filenames differently, while still putting most of the * common parsing into one place. */ static int header_common(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; char tartype; int err = ARCHIVE_OK; header = (const struct archive_entry_header_ustar *)h; if (header->linkname[0]) archive_strncpy(&(tar->entry_linkpath), header->linkname, sizeof(header->linkname)); else archive_string_empty(&(tar->entry_linkpath)); /* Parse out the numeric fields (all are octal) */ archive_entry_set_mode(entry, (mode_t)tar_atol(header->mode, sizeof(header->mode))); archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); if (tar->entry_bytes_remaining < 0) { tar->entry_bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Tar entry has negative size"); return (ARCHIVE_FATAL); } if (tar->entry_bytes_remaining == INT64_MAX) { /* Note: tar_atol returns INT64_MAX on overflow */ tar->entry_bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Tar entry size overflow"); return (ARCHIVE_FATAL); } tar->realsize = tar->entry_bytes_remaining; archive_entry_set_size(entry, tar->entry_bytes_remaining); archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); /* Handle the tar type flag appropriately. */ tartype = header->typeflag[0]; switch (tartype) { case '1': /* Hard link */ if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); } /* * The following may seem odd, but: Technically, tar * does not store the file type for a "hard link" * entry, only the fact that it is a hard link. So, I * leave the type zero normally. But, pax interchange * format allows hard links to have data, which * implies that the underlying entry is a regular * file. */ if (archive_entry_size(entry) > 0) archive_entry_set_filetype(entry, AE_IFREG); /* * A tricky point: Traditionally, tar readers have * ignored the size field when reading hardlink * entries, and some writers put non-zero sizes even * though the body is empty. POSIX blessed this * convention in the 1988 standard, but broke with * this tradition in 2001 by permitting hardlink * entries to store valid bodies in pax interchange * format, but not in ustar format. Since there is no * hard and fast way to distinguish pax interchange * from earlier archives (the 'x' and 'g' entries are * optional, after all), we need a heuristic. */ if (archive_entry_size(entry) == 0) { /* If the size is already zero, we're done. */ } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { /* Definitely pax extended; must obey hardlink size. */ } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) { /* Old-style or GNU tar: we must ignore the size. */ archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; } else if (archive_read_format_tar_bid(a, 50) > 50) { /* * We don't know if it's pax: If the bid * function sees a valid ustar header * immediately following, then let's ignore * the hardlink size. */ archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; } /* * TODO: There are still two cases I'd like to handle: * = a ustar non-pax archive with a hardlink entry at * end-of-archive. (Look for block of nulls following?) * = a pax archive that has not seen any pax headers * and has an entry which is a hardlink entry storing * a body containing an uncompressed tar archive. * The first is worth addressing; I don't see any reliable * way to deal with the second possibility. */ break; case '2': /* Symlink */ archive_entry_set_filetype(entry, AE_IFLNK); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); } break; case '3': /* Character device */ archive_entry_set_filetype(entry, AE_IFCHR); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '4': /* Block device */ archive_entry_set_filetype(entry, AE_IFBLK); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '5': /* Dir */ archive_entry_set_filetype(entry, AE_IFDIR); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '6': /* FIFO device */ archive_entry_set_filetype(entry, AE_IFIFO); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case 'D': /* GNU incremental directory type */ /* * No special handling is actually required here. * It might be nice someday to preprocess the file list and * provide it to the client, though. */ archive_entry_set_filetype(entry, AE_IFDIR); break; case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ /* * As far as I can tell, this is just like a regular file * entry, except that the contents should be _appended_ to * the indicated file at the indicated offset. This may * require some API work to fully support. */ break; case 'N': /* Old GNU "long filename" entry. */ /* The body of this entry is a script for renaming * previously-extracted entries. Ugh. It will never * be supported by libarchive. */ archive_entry_set_filetype(entry, AE_IFREG); break; case 'S': /* GNU sparse files */ /* * Sparse files are really just regular files with * sparse information in the extended area. */ /* FALLTHROUGH */ case '0': /* * Enable sparse file "read" support only for regular * files and explicit GNU sparse files. However, we * don't allow non-standard file types to be sparse. */ tar->sparse_allowed = 1; /* FALLTHROUGH */ default: /* Regular file and non-standard types */ /* * Per POSIX: non-recognized types should always be * treated as regular files. */ archive_entry_set_filetype(entry, AE_IFREG); break; } return (err); } /* * Parse out header elements for "old-style" tar archives. */ static int header_old_tar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; int err = ARCHIVE_OK, err2; /* Copy filename over (to ensure null termination). */ header = (const struct archive_entry_header_ustar *)h; if (archive_entry_copy_pathname_l(entry, header->name, sizeof(header->name), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Grab rest of common fields */ err2 = header_common(a, tar, entry, h); if (err > err2) err = err2; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Read a Mac AppleDouble-encoded blob of file metadata, * if there is one. */ static int read_mac_metadata_blob(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int64_t size; const void *data; const char *p, *name; const wchar_t *wp, *wname; (void)h; /* UNUSED */ wname = wp = archive_entry_pathname_w(entry); if (wp != NULL) { /* Find the last path element. */ for (; *wp != L'\0'; ++wp) { if (wp[0] == '/' && wp[1] != L'\0') wname = wp + 1; } /* * If last path element starts with "._", then * this is a Mac extension. */ if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') return ARCHIVE_OK; } else { /* Find the last path element. */ name = p = archive_entry_pathname(entry); if (p == NULL) return (ARCHIVE_FAILED); for (; *p != '\0'; ++p) { if (p[0] == '/' && p[1] != '\0') name = p + 1; } /* * If last path element starts with "._", then * this is a Mac extension. */ if (name[0] != '.' || name[1] != '_' || name[2] == '\0') return ARCHIVE_OK; } /* Read the body as a Mac OS metadata blob. */ size = archive_entry_size(entry); /* * TODO: Look beyond the body here to peek at the next header. * If it's a regular header (not an extension header) * that has the wrong name, just return the current * entry as-is, without consuming the body here. * That would reduce the risk of us mis-identifying * an ordinary file that just happened to have * a name starting with "._". * * Q: Is the above idea really possible? Even * when there are GNU or pax extension entries? */ data = __archive_read_ahead(a, (size_t)size, NULL); if (data == NULL) { *unconsumed = 0; return (ARCHIVE_FATAL); } archive_entry_copy_mac_metadata(entry, data, (size_t)size); *unconsumed = (size_t)((size + 511) & ~ 511); tar_flush_unconsumed(a, unconsumed); return (tar_read_header(a, tar, entry, unconsumed)); } /* * Parse a file header for a pax extended archive entry. */ static int header_pax_global(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); if (err != ARCHIVE_OK) return (err); err = tar_read_header(a, tar, entry, unconsumed); return (err); } static int header_pax_extensions(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err, err2; err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Parse the next header. */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* * TODO: Parse global/default options into 'entry' struct here * before handling file-specific options. * * This design (parse standard header, then overwrite with pax * extended attribute data) usually works well, but isn't ideal; * it would be better to parse the pax extended attributes first * and then skip any fields in the standard header that were * defined in the pax header. */ err2 = pax_header(a, tar, entry, tar->pax_header.s); err = err_combine(err, err2); tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Parse a file header for a Posix "ustar" archive entry. This also * handles "pax" or "extended ustar" entries. */ static int header_ustar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; struct archive_string *as; int err = ARCHIVE_OK, r; header = (const struct archive_entry_header_ustar *)h; /* Copy name into an internal buffer to ensure null-termination. */ as = &(tar->entry_pathname); if (header->prefix[0]) { archive_strncpy(as, header->prefix, sizeof(header->prefix)); if (as->s[archive_strlen(as) - 1] != '/') archive_strappend_char(as, '/'); archive_strncat(as, header->name, sizeof(header->name)); } else { archive_strncpy(as, header->name, sizeof(header->name)); } if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Handle rest of common fields. */ r = header_common(a, tar, entry, h); if (r == ARCHIVE_FATAL) return (r); if (r < err) err = r; /* Handle POSIX ustar fields. */ if (archive_entry_copy_uname_l(entry, header->uname, sizeof(header->uname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); } if (archive_entry_copy_gname_l(entry, header->gname, sizeof(header->gname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); } /* Parse out device numbers only for char and block specials. */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { archive_entry_set_rdevmajor(entry, (dev_t) tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); archive_entry_set_rdevminor(entry, (dev_t) tar_atol(header->rdevminor, sizeof(header->rdevminor))); } tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Parse the pax extended attributes record. * * Returns non-zero if there's an error in the data. */ static int pax_header(struct archive_read *a, struct tar *tar, struct archive_entry *entry, char *attr) { size_t attr_length, l, line_length; char *p; char *key, *value; struct archive_string *as; struct archive_string_conv *sconv; int err, err2; attr_length = strlen(attr); tar->pax_hdrcharset_binary = 0; archive_string_empty(&(tar->entry_gname)); archive_string_empty(&(tar->entry_linkpath)); archive_string_empty(&(tar->entry_pathname)); archive_string_empty(&(tar->entry_pathname_override)); archive_string_empty(&(tar->entry_uname)); err = ARCHIVE_OK; while (attr_length > 0) { /* Parse decimal length field at start of line. */ line_length = 0; l = attr_length; p = attr; /* Record start of line. */ while (l>0) { if (*p == ' ') { p++; l--; break; } if (*p < '0' || *p > '9') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Ignoring malformed pax extended attributes"); return (ARCHIVE_WARN); } line_length *= 10; line_length += *p - '0'; if (line_length > 999999) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Rejecting pax extended attribute > 1MB"); return (ARCHIVE_WARN); } p++; l--; } /* * Parsed length must be no bigger than available data, * at least 1, and the last character of the line must * be '\n'. */ if (line_length > attr_length || line_length < 1 || attr[line_length - 1] != '\n') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Ignoring malformed pax extended attribute"); return (ARCHIVE_WARN); } /* Null-terminate the line. */ attr[line_length - 1] = '\0'; /* Find end of key and null terminate it. */ key = p; if (key[0] == '=') return (-1); while (*p && *p != '=') ++p; if (*p == '\0') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Invalid pax extended attributes"); return (ARCHIVE_WARN); } *p = '\0'; /* Identify null-terminated 'value' portion. */ value = p + 1; /* Identify this attribute and set it in the entry. */ err2 = pax_attribute(a, tar, entry, key, value); if (err2 == ARCHIVE_FATAL) return (err2); err = err_combine(err, err2); /* Skip to next line */ attr += line_length; attr_length -= line_length; } /* * PAX format uses UTF-8 as default charset for its metadata * unless hdrcharset=BINARY is present in its header. * We apply the charset specified by the hdrcharset option only * when the hdrcharset attribute(in PAX header) is BINARY because * we respect the charset described in PAX header and BINARY also * means that metadata(filename,uname and gname) character-set * is unknown. */ if (tar->pax_hdrcharset_binary) sconv = tar->opt_sconv; else { sconv = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (sconv == NULL) return (ARCHIVE_FATAL); if (tar->compat_2x) archive_string_conversion_set_opt(sconv, SCONV_SET_OPT_UTF8_LIBARCHIVE2X); } if (archive_strlen(&(tar->entry_gname)) > 0) { if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, archive_strlen(&(tar->entry_gname)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_gname(entry, tar->entry_gname.s); } } if (archive_strlen(&(tar->entry_linkpath)) > 0) { if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_link(entry, tar->entry_linkpath.s); } } /* * Some extensions (such as the GNU sparse file extensions) * deliberately store a synthetic name under the regular 'path' * attribute and the real file name under a different attribute. * Since we're supposed to not care about the order, we * have no choice but to store all of the various filenames * we find and figure it all out afterwards. This is the * figuring out part. */ as = NULL; if (archive_strlen(&(tar->entry_pathname_override)) > 0) as = &(tar->entry_pathname_override); else if (archive_strlen(&(tar->entry_pathname)) > 0) as = &(tar->entry_pathname); if (as != NULL) { if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_pathname(entry, as->s); } } if (archive_strlen(&(tar->entry_uname)) > 0) { if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, archive_strlen(&(tar->entry_uname)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_uname(entry, tar->entry_uname.s); } } return (err); } static int pax_attribute_xattr(struct archive_entry *entry, const char *name, const char *value) { char *name_decoded; void *value_decoded; size_t value_len; if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) return 3; name += 17; /* URL-decode name */ name_decoded = url_decode(name); if (name_decoded == NULL) return 2; /* Base-64 decode value */ value_decoded = base64_decode(value, strlen(value), &value_len); if (value_decoded == NULL) { free(name_decoded); return 1; } archive_entry_xattr_add_entry(entry, name_decoded, value_decoded, value_len); free(name_decoded); free(value_decoded); return 0; } /* * Parse a single key=value attribute. key/value pointers are * assumed to point into reasonably long-lived storage. * * Note that POSIX reserves all-lowercase keywords. Vendor-specific * extensions should always have keywords of the form "VENDOR.attribute" * In particular, it's quite feasible to support many different * vendor extensions here. I'm using "LIBARCHIVE" for extensions * unique to this library. * * Investigate other vendor-specific extensions and see if * any of them look useful. */ static int pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const char *key, const char *value) { int64_t s; long n; int err = ARCHIVE_OK, r; if (value == NULL) value = ""; /* Disable compiler warning; do not pass * NULL pointer to strlen(). */ switch (key[0]) { case 'G': /* Reject GNU.sparse.* headers on non-regular files. */ if (strncmp(key, "GNU.sparse", 10) == 0 && !tar->sparse_allowed) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Non-regular file cannot be sparse"); return (ARCHIVE_FATAL); } /* GNU "0.0" sparse pax format. */ if (strcmp(key, "GNU.sparse.numblocks") == 0) { tar->sparse_offset = -1; tar->sparse_numbytes = -1; tar->sparse_gnu_major = 0; tar->sparse_gnu_minor = 0; } if (strcmp(key, "GNU.sparse.offset") == 0) { tar->sparse_offset = tar_atol10(value, strlen(value)); if (tar->sparse_numbytes != -1) { if (gnu_add_sparse_entry(a, tar, tar->sparse_offset, tar->sparse_numbytes) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_offset = -1; tar->sparse_numbytes = -1; } } if (strcmp(key, "GNU.sparse.numbytes") == 0) { tar->sparse_numbytes = tar_atol10(value, strlen(value)); if (tar->sparse_numbytes != -1) { if (gnu_add_sparse_entry(a, tar, tar->sparse_offset, tar->sparse_numbytes) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_offset = -1; tar->sparse_numbytes = -1; } } if (strcmp(key, "GNU.sparse.size") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } /* GNU "0.1" sparse pax format. */ if (strcmp(key, "GNU.sparse.map") == 0) { tar->sparse_gnu_major = 0; tar->sparse_gnu_minor = 1; if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) return (ARCHIVE_WARN); } /* GNU "1.0" sparse pax format */ if (strcmp(key, "GNU.sparse.major") == 0) { tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); tar->sparse_gnu_pending = 1; } if (strcmp(key, "GNU.sparse.minor") == 0) { tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); tar->sparse_gnu_pending = 1; } if (strcmp(key, "GNU.sparse.name") == 0) { /* * The real filename; when storing sparse * files, GNU tar puts a synthesized name into * the regular 'path' attribute in an attempt * to limit confusion. ;-) */ archive_strcpy(&(tar->entry_pathname_override), value); } if (strcmp(key, "GNU.sparse.realsize") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } break; case 'L': /* Our extensions */ /* TODO: Handle arbitrary extended attributes... */ /* if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) archive_entry_set_xxxxxx(entry, value); */ if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { pax_time(value, &s, &n); archive_entry_set_birthtime(entry, s, n); } if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) pax_attribute_xattr(entry, key, value); break; case 'S': /* We support some keys used by the "star" archiver */ if (strcmp(key, "SCHILY.acl.access") == 0) { if (tar->sconv_acl == NULL) { tar->sconv_acl = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (tar->sconv_acl == NULL) return (ARCHIVE_FATAL); } r = archive_acl_parse_l(archive_entry_acl(entry), value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl); if (r != ARCHIVE_OK) { err = r; if (err == ARCHIVE_FATAL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for " "SCHILY.acl.access"); return (err); } archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parse error: SCHILY.acl.access"); } } else if (strcmp(key, "SCHILY.acl.default") == 0) { if (tar->sconv_acl == NULL) { tar->sconv_acl = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (tar->sconv_acl == NULL) return (ARCHIVE_FATAL); } r = archive_acl_parse_l(archive_entry_acl(entry), value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, tar->sconv_acl); if (r != ARCHIVE_OK) { err = r; if (err == ARCHIVE_FATAL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for " "SCHILY.acl.default"); return (err); } archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parse error: SCHILY.acl.default"); } } else if (strcmp(key, "SCHILY.devmajor") == 0) { archive_entry_set_rdevmajor(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.devminor") == 0) { archive_entry_set_rdevminor(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.fflags") == 0) { archive_entry_copy_fflags_text(entry, value); } else if (strcmp(key, "SCHILY.dev") == 0) { archive_entry_set_dev(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.ino") == 0) { archive_entry_set_ino(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.nlink") == 0) { archive_entry_set_nlink(entry, (unsigned) tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.realsize") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } else if (strcmp(key, "SUN.holesdata") == 0) { /* A Solaris extension for sparse. */ r = solaris_sparse_parse(a, tar, entry, value); if (r < err) { if (r == ARCHIVE_FATAL) return (r); err = r; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parse error: SUN.holesdata"); } } break; case 'a': if (strcmp(key, "atime") == 0) { pax_time(value, &s, &n); archive_entry_set_atime(entry, s, n); } break; case 'c': if (strcmp(key, "ctime") == 0) { pax_time(value, &s, &n); archive_entry_set_ctime(entry, s, n); } else if (strcmp(key, "charset") == 0) { /* TODO: Publish charset information in entry. */ } else if (strcmp(key, "comment") == 0) { /* TODO: Publish comment in entry. */ } break; case 'g': if (strcmp(key, "gid") == 0) { archive_entry_set_gid(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "gname") == 0) { archive_strcpy(&(tar->entry_gname), value); } break; case 'h': if (strcmp(key, "hdrcharset") == 0) { if (strcmp(value, "BINARY") == 0) /* Binary mode. */ tar->pax_hdrcharset_binary = 1; else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) tar->pax_hdrcharset_binary = 0; } break; case 'l': /* pax interchange doesn't distinguish hardlink vs. symlink. */ if (strcmp(key, "linkpath") == 0) { archive_strcpy(&(tar->entry_linkpath), value); } break; case 'm': if (strcmp(key, "mtime") == 0) { pax_time(value, &s, &n); archive_entry_set_mtime(entry, s, n); } break; case 'p': if (strcmp(key, "path") == 0) { archive_strcpy(&(tar->entry_pathname), value); } break; case 'r': /* POSIX has reserved 'realtime.*' */ break; case 's': /* POSIX has reserved 'security.*' */ /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ if (strcmp(key, "size") == 0) { /* "size" is the size of the data in the entry. */ tar->entry_bytes_remaining = tar_atol10(value, strlen(value)); /* * But, "size" is not necessarily the size of * the file on disk; if this is a sparse file, * the disk size may have already been set from * GNU.sparse.realsize or GNU.sparse.size or * an old GNU header field or SCHILY.realsize * or .... */ if (tar->realsize < 0) { archive_entry_set_size(entry, tar->entry_bytes_remaining); tar->realsize = tar->entry_bytes_remaining; } } break; case 'u': if (strcmp(key, "uid") == 0) { archive_entry_set_uid(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "uname") == 0) { archive_strcpy(&(tar->entry_uname), value); } break; } return (err); } /* * parse a decimal time value, which may include a fractional portion */ static void pax_time(const char *p, int64_t *ps, long *pn) { char digit; int64_t s; unsigned long l; int sign; int64_t limit, last_digit_limit; limit = INT64_MAX / 10; last_digit_limit = INT64_MAX % 10; s = 0; sign = 1; if (*p == '-') { sign = -1; p++; } while (*p >= '0' && *p <= '9') { digit = *p - '0'; if (s > limit || (s == limit && digit > last_digit_limit)) { s = INT64_MAX; break; } s = (s * 10) + digit; ++p; } *ps = s * sign; /* Calculate nanoseconds. */ *pn = 0; if (*p != '.') return; l = 100000000UL; do { ++p; if (*p >= '0' && *p <= '9') *pn += (*p - '0') * l; else break; } while (l /= 10); } /* * Parse GNU tar header */ static int header_gnutar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { const struct archive_entry_header_gnutar *header; int64_t t; int err = ARCHIVE_OK; /* * GNU header is like POSIX ustar, except 'prefix' is * replaced with some other fields. This also means the * filename is stored as in old-style archives. */ /* Grab fields common to all tar variants. */ err = header_common(a, tar, entry, h); if (err == ARCHIVE_FATAL) return (err); /* Copy filename over (to ensure null termination). */ header = (const struct archive_entry_header_gnutar *)h; if (archive_entry_copy_pathname_l(entry, header->name, sizeof(header->name), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Fields common to ustar and GNU */ /* XXX Can the following be factored out since it's common * to ustar and gnu tar? Is it okay to move it down into * header_common, perhaps? */ if (archive_entry_copy_uname_l(entry, header->uname, sizeof(header->uname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); } if (archive_entry_copy_gname_l(entry, header->gname, sizeof(header->gname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); } /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { archive_entry_set_rdevmajor(entry, (dev_t) tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); archive_entry_set_rdevminor(entry, (dev_t) tar_atol(header->rdevminor, sizeof(header->rdevminor))); } else archive_entry_set_rdev(entry, 0); tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); /* Grab GNU-specific fields. */ t = tar_atol(header->atime, sizeof(header->atime)); if (t > 0) archive_entry_set_atime(entry, t, 0); t = tar_atol(header->ctime, sizeof(header->ctime)); if (t > 0) archive_entry_set_ctime(entry, t, 0); if (header->realsize[0] != 0) { tar->realsize = tar_atol(header->realsize, sizeof(header->realsize)); archive_entry_set_size(entry, tar->realsize); } if (header->sparse[0].offset[0] != 0) { if (gnu_sparse_old_read(a, tar, header, unconsumed) != ARCHIVE_OK) return (ARCHIVE_FATAL); } else { if (header->isextended[0] != 0) { /* XXX WTF? XXX */ } } return (err); } static int gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, int64_t offset, int64_t remaining) { struct sparse_block *p; p = (struct sparse_block *)malloc(sizeof(*p)); if (p == NULL) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); return (ARCHIVE_FATAL); } memset(p, 0, sizeof(*p)); if (tar->sparse_last != NULL) tar->sparse_last->next = p; else tar->sparse_list = p; tar->sparse_last = p; if (remaining < 0 || offset < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); return (ARCHIVE_FATAL); } p->offset = offset; p->remaining = remaining; return (ARCHIVE_OK); } static void gnu_clear_sparse_list(struct tar *tar) { struct sparse_block *p; while (tar->sparse_list != NULL) { p = tar->sparse_list; tar->sparse_list = p->next; free(p); } tar->sparse_last = NULL; } /* * GNU tar old-format sparse data. * * GNU old-format sparse data is stored in a fixed-field * format. Offset/size values are 11-byte octal fields (same * format as 'size' field in ustart header). These are * stored in the header, allocating subsequent header blocks * as needed. Extending the header in this way is a pretty * severe POSIX violation; this design has earned GNU tar a * lot of criticism. */ static int gnu_sparse_old_read(struct archive_read *a, struct tar *tar, const struct archive_entry_header_gnutar *header, size_t *unconsumed) { ssize_t bytes_read; const void *data; struct extended { struct gnu_sparse sparse[21]; char isextended[1]; char padding[7]; }; const struct extended *ext; if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) return (ARCHIVE_FATAL); if (header->isextended[0] == 0) return (ARCHIVE_OK); do { tar_flush_unconsumed(a, unconsumed); data = __archive_read_ahead(a, 512, &bytes_read); if (bytes_read < 0) return (ARCHIVE_FATAL); if (bytes_read < 512) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive " "detected while reading sparse file data"); return (ARCHIVE_FATAL); } *unconsumed = 512; ext = (const struct extended *)data; if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) return (ARCHIVE_FATAL); } while (ext->isextended[0] != 0); if (tar->sparse_list != NULL) tar->entry_offset = tar->sparse_list->offset; return (ARCHIVE_OK); } static int gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, const struct gnu_sparse *sparse, int length) { while (length > 0 && sparse->offset[0] != 0) { if (gnu_add_sparse_entry(a, tar, tar_atol(sparse->offset, sizeof(sparse->offset)), tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) != ARCHIVE_OK) return (ARCHIVE_FATAL); sparse++; length--; } return (ARCHIVE_OK); } /* * GNU tar sparse format 0.0 * * Beginning with GNU tar 1.15, sparse files are stored using * information in the pax extended header. The GNU tar maintainers * have gone through a number of variations in the process of working * out this scheme; fortunately, they're all numbered. * * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to * store offset/size for each block. The repeated instances of these * latter fields violate the pax specification (which frowns on * duplicate keys), so this format was quickly replaced. */ /* * GNU tar sparse format 0.1 * * This version replaced the offset/numbytes attributes with * a single "map" attribute that stored a list of integers. This * format had two problems: First, the "map" attribute could be very * long, which caused problems for some implementations. More * importantly, the sparse data was lost when extracted by archivers * that didn't recognize this extension. */ static int gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) { const char *e; int64_t offset = -1, size = -1; for (;;) { e = p; while (*e != '\0' && *e != ',') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; } if (offset < 0) { offset = tar_atol10(p, e - p); if (offset < 0) return (ARCHIVE_WARN); } else { size = tar_atol10(p, e - p); if (size < 0) return (ARCHIVE_WARN); if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) return (ARCHIVE_FATAL); offset = -1; } if (*e == '\0') return (ARCHIVE_OK); p = e + 1; } } /* * GNU tar sparse format 1.0 * * The idea: The offset/size data is stored as a series of base-10 * ASCII numbers prepended to the file data, so that dearchivers that * don't support this format will extract the block map along with the * data and a separate post-process can restore the sparseness. * * Unfortunately, GNU tar 1.16 had a bug that added unnecessary * padding to the body of the file when using this format. GNU tar * 1.17 corrected this bug without bumping the version number, so * it's not possible to support both variants. This code supports * the later variant at the expense of not supporting the former. * * This variant also replaced GNU.sparse.size with GNU.sparse.realsize * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. */ /* * Read the next line from the input, and parse it as a decimal * integer followed by '\n'. Returns positive integer value or * negative on error. */ static int64_t gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, int64_t *remaining, size_t *unconsumed) { int64_t l, limit, last_digit_limit; const char *p; ssize_t bytes_read; int base, digit; base = 10; limit = INT64_MAX / base; last_digit_limit = INT64_MAX % base; /* * Skip any lines starting with '#'; GNU tar specs * don't require this, but they should. */ do { bytes_read = readline(a, tar, &p, (ssize_t)tar_min(*remaining, 100), unconsumed); if (bytes_read <= 0) return (ARCHIVE_FATAL); *remaining -= bytes_read; } while (p[0] == '#'); l = 0; while (bytes_read > 0) { if (*p == '\n') return (l); if (*p < '0' || *p >= '0' + base) return (ARCHIVE_WARN); digit = *p - '0'; if (l > limit || (l == limit && digit > last_digit_limit)) l = INT64_MAX; /* Truncate on overflow. */ else l = (l * base) + digit; p++; bytes_read--; } /* TODO: Error message. */ return (ARCHIVE_WARN); } /* * Returns length (in bytes) of the sparse data description * that was read. */ static ssize_t gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) { ssize_t bytes_read; int entries; int64_t offset, size, to_skip, remaining; /* Clear out the existing sparse list. */ gnu_clear_sparse_list(tar); remaining = tar->entry_bytes_remaining; /* Parse entries. */ entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (entries < 0) return (ARCHIVE_FATAL); /* Parse the individual entries. */ while (entries-- > 0) { /* Parse offset/size */ offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (offset < 0) return (ARCHIVE_FATAL); size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (size < 0) return (ARCHIVE_FATAL); /* Add a new sparse entry. */ if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) return (ARCHIVE_FATAL); } /* Skip rest of block... */ tar_flush_unconsumed(a, unconsumed); bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); to_skip = 0x1ff & -bytes_read; if (to_skip != __archive_read_consume(a, to_skip)) return (ARCHIVE_FATAL); return ((ssize_t)(bytes_read + to_skip)); } /* * Solaris pax extension for a sparse file. This is recorded with the * data and hole pairs. The way recording sparse information by Solaris' * pax simply indicates where data and sparse are, so the stored contents * consist of both data and hole. */ static int solaris_sparse_parse(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const char *p) { const char *e; int64_t start, end; int hole = 1; (void)entry; /* UNUSED */ end = 0; if (*p == ' ') p++; else return (ARCHIVE_WARN); for (;;) { e = p; while (*e != '\0' && *e != ' ') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; } start = end; end = tar_atol10(p, e - p); if (end < 0) return (ARCHIVE_WARN); if (start < end) { if (gnu_add_sparse_entry(a, tar, start, end - start) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_last->hole = hole; } if (*e == '\0') return (ARCHIVE_OK); p = e + 1; hole = hole == 0; } } /*- * Convert text->integer. * * Traditional tar formats (including POSIX) specify base-8 for * all of the standard numeric fields. This is a significant limitation * in practice: * = file size is limited to 8GB * = rdevmajor and rdevminor are limited to 21 bits * = uid/gid are limited to 21 bits * * There are two workarounds for this: * = pax extended headers, which use variable-length string fields * = GNU tar and STAR both allow either base-8 or base-256 in * most fields. The high bit is set to indicate base-256. * * On read, this implementation supports both extensions. */ static int64_t tar_atol(const char *p, size_t char_cnt) { /* * Technically, GNU tar considers a field to be in base-256 * only if the first byte is 0xff or 0x80. */ if (*p & 0x80) return (tar_atol256(p, char_cnt)); return (tar_atol8(p, char_cnt)); } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t tar_atol_base_n(const char *p, size_t char_cnt, int base) { int64_t l, maxval, limit, last_digit_limit; int digit, sign; maxval = INT64_MAX; limit = INT64_MAX / base; last_digit_limit = INT64_MAX % base; /* the pointer will not be dereferenced if char_cnt is zero * due to the way the && operator is evaulated. */ while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { p++; char_cnt--; } sign = 1; if (char_cnt != 0 && *p == '-') { sign = -1; p++; char_cnt--; maxval = INT64_MIN; limit = -(INT64_MIN / base); last_digit_limit = INT64_MIN % base; } l = 0; if (char_cnt != 0) { digit = *p - '0'; while (digit >= 0 && digit < base && char_cnt != 0) { if (l>limit || (l == limit && digit > last_digit_limit)) { return maxval; /* Truncate on overflow. */ } l = (l * base) + digit; digit = *++p - '0'; char_cnt--; } } return (sign < 0) ? -l : l; } static int64_t tar_atol8(const char *p, size_t char_cnt) { return tar_atol_base_n(p, char_cnt, 8); } static int64_t tar_atol10(const char *p, size_t char_cnt) { return tar_atol_base_n(p, char_cnt, 10); } /* * Parse a base-256 integer. This is just a variable-length * twos-complement signed binary value in big-endian order, except * that the high-order bit is ignored. The values here can be up to * 12 bytes, so we need to be careful about overflowing 64-bit * (8-byte) integers. * * This code unashamedly assumes that the local machine uses 8-bit * bytes and twos-complement arithmetic. */ static int64_t tar_atol256(const char *_p, size_t char_cnt) { uint64_t l; const unsigned char *p = (const unsigned char *)_p; unsigned char c, neg; /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ c = *p; if (c & 0x40) { neg = 0xff; c |= 0x80; l = ~ARCHIVE_LITERAL_ULL(0); } else { neg = 0; c &= 0x7f; l = 0; } /* If more than 8 bytes, check that we can ignore * high-order bits without overflow. */ while (char_cnt > sizeof(int64_t)) { --char_cnt; if (c != neg) return neg ? INT64_MIN : INT64_MAX; c = *++p; } /* c is first byte that fits; if sign mismatch, return overflow */ if ((c ^ neg) & 0x80) { return neg ? INT64_MIN : INT64_MAX; } /* Accumulate remaining bytes. */ while (--char_cnt > 0) { l = (l << 8) | c; c = *++p; } l = (l << 8) | c; /* Return signed twos-complement value. */ return (int64_t)(l); } /* * Returns length of line (including trailing newline) * or negative on error. 'start' argument is updated to * point to first character of line. This avoids copying * when possible. */ static ssize_t readline(struct archive_read *a, struct tar *tar, const char **start, ssize_t limit, size_t *unconsumed) { ssize_t bytes_read; ssize_t total_size = 0; const void *t; const char *s; void *p; tar_flush_unconsumed(a, unconsumed); t = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read <= 0) return (ARCHIVE_FATAL); s = t; /* Start of line? */ p = memchr(t, '\n', bytes_read); /* If we found '\n' in the read buffer, return pointer to that. */ if (p != NULL) { bytes_read = 1 + ((const char *)p) - s; if (bytes_read > limit) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Line too long"); return (ARCHIVE_FATAL); } *unconsumed = bytes_read; *start = s; return (bytes_read); } *unconsumed = bytes_read; /* Otherwise, we need to accumulate in a line buffer. */ for (;;) { if (total_size + bytes_read > limit) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Line too long"); return (ARCHIVE_FATAL); } if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate working buffer"); return (ARCHIVE_FATAL); } memcpy(tar->line.s + total_size, t, bytes_read); tar_flush_unconsumed(a, unconsumed); total_size += bytes_read; /* If we found '\n', clean up and return. */ if (p != NULL) { *start = tar->line.s; return (total_size); } /* Read some more. */ t = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read <= 0) return (ARCHIVE_FATAL); s = t; /* Start of line? */ p = memchr(t, '\n', bytes_read); /* If we found '\n', trim the read. */ if (p != NULL) { bytes_read = 1 + ((const char *)p) - s; } *unconsumed = bytes_read; } } /* * base64_decode - Base64 decode * * This accepts most variations of base-64 encoding, including: * * with or without line breaks * * with or without the final group padded with '=' or '_' characters * (The most economical Base-64 variant does not pad the last group and * omits line breaks; RFC1341 used for MIME requires both.) */ static char * base64_decode(const char *s, size_t len, size_t *out_len) { static const unsigned char digits[64] = { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', '4','5','6','7','8','9','+','/' }; static unsigned char decode_table[128]; char *out, *d; const unsigned char *src = (const unsigned char *)s; /* If the decode table is not yet initialized, prepare it. */ if (decode_table[digits[1]] != 1) { unsigned i; memset(decode_table, 0xff, sizeof(decode_table)); for (i = 0; i < sizeof(digits); i++) decode_table[digits[i]] = i; } /* Allocate enough space to hold the entire output. */ /* Note that we may not use all of this... */ out = (char *)malloc(len - len / 4 + 1); if (out == NULL) { *out_len = 0; return (NULL); } d = out; while (len > 0) { /* Collect the next group of (up to) four characters. */ int v = 0; int group_size = 0; while (group_size < 4 && len > 0) { /* '=' or '_' padding indicates final group. */ if (*src == '=' || *src == '_') { len = 0; break; } /* Skip illegal characters (including line breaks) */ if (*src > 127 || *src < 32 || decode_table[*src] == 0xff) { len--; src++; continue; } v <<= 6; v |= decode_table[*src++]; len --; group_size++; } /* Align a short group properly. */ v <<= 6 * (4 - group_size); /* Unpack the group we just collected. */ switch (group_size) { case 4: d[2] = v & 0xff; /* FALLTHROUGH */ case 3: d[1] = (v >> 8) & 0xff; /* FALLTHROUGH */ case 2: d[0] = (v >> 16) & 0xff; break; case 1: /* this is invalid! */ break; } d += group_size * 3 / 4; } *out_len = d - out; return (out); } static char * url_decode(const char *in) { char *out, *d; const char *s; out = (char *)malloc(strlen(in) + 1); if (out == NULL) return (NULL); for (s = in, d = out; *s != '\0'; ) { if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { /* Try to convert % escape */ int digit1 = tohex(s[1]); int digit2 = tohex(s[2]); if (digit1 >= 0 && digit2 >= 0) { /* Looks good, consume three chars */ s += 3; /* Convert output */ *d++ = ((digit1 << 4) | digit2); continue; } /* Else fall through and treat '%' as normal char */ } *d++ = *s++; } *d = '\0'; return (out); } static int tohex(int c) { if (c >= '0' && c <= '9') return (c - '0'); else if (c >= 'A' && c <= 'F') return (c - 'A' + 10); else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); else return (-1); }