Index: head/lib/libarchive/archive_private.h =================================================================== --- head/lib/libarchive/archive_private.h (revision 144702) +++ head/lib/libarchive/archive_private.h (revision 144703) @@ -1,242 +1,244 @@ /*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ARCHIVE_PRIVATE_H_INCLUDED #define ARCHIVE_PRIVATE_H_INCLUDED #include #include "archive.h" #include "archive_string.h" #define ARCHIVE_WRITE_MAGIC (0xb0c5c0deU) #define ARCHIVE_READ_MAGIC (0xdeb0c5U) struct archive { /* * The magic/state values are used to sanity-check the * client's usage. If an API function is called at a * rediculous time, or the client passes us an invalid * pointer, these values allow me to catch that. */ unsigned magic; unsigned state; struct archive_entry *entry; uid_t user_uid; /* UID of current user. */ /* Dev/ino of the archive being read/written. */ dev_t skip_file_dev; ino_t skip_file_ino; /* Utility: Pointer to a block of nulls. */ const char *nulls; size_t null_length; /* * Used by archive_read_data() to track blocks and copy * data to client buffers, filling gaps with zero bytes. */ const char *read_data_block; off_t read_data_offset; off_t read_data_output_offset; size_t read_data_remaining; /* Callbacks to open/read/write/close archive stream. */ archive_open_callback *client_opener; archive_read_callback *client_reader; archive_write_callback *client_writer; archive_close_callback *client_closer; void *client_data; /* * Blocking information. Note that bytes_in_last_block is * misleadingly named; I should find a better name. These * control the final output from all compressors, including * compression_none. */ int bytes_per_block; int bytes_in_last_block; /* * These control whether data within a gzip/bzip2 compressed * stream gets padded or not. If pad_uncompressed is set, * the data will be padded to a full block before being * compressed. The pad_uncompressed_byte determines the value * that will be used for padding. Note that these have no * effect on compression "none." */ int pad_uncompressed; int pad_uncompressed_byte; /* TODO: Support this. */ /* Position in UNCOMPRESSED data stream. */ off_t file_position; /* Position in COMPRESSED data stream. */ off_t raw_position; /* File offset of beginning of most recently-read header. */ off_t header_position; /* * Detection functions for decompression: bid functions are * given a block of data from the beginning of the stream and * can bid on whether or not they support the data stream. * General guideline: bid the number of bits that you actually * test, e.g., 16 if you test a 2-byte magic value. The * highest bidder will have their init function invoked, which * can set up pointers to specific handlers. * * On write, the client just invokes an archive_write_set function * which sets up the data here directly. */ int compression_code; /* Currently active compression. */ const char *compression_name; struct { int (*bid)(const void *buff, size_t); int (*init)(struct archive *, const void *buff, size_t); } decompressors[4]; /* Read/write data stream (with compression). */ void *compression_data; /* Data for (de)compressor. */ int (*compression_init)(struct archive *); /* Initialize. */ int (*compression_finish)(struct archive *); int (*compression_write)(struct archive *, const void *, size_t); /* * Read uses a peek/consume I/O model: the decompression code * returns a pointer to the requested block and advances the * file position only when requested by a consume call. This * reduces copying and also simplifies look-ahead for format * detection. */ ssize_t (*compression_read_ahead)(struct archive *, const void **, size_t request); ssize_t (*compression_read_consume)(struct archive *, size_t); /* * Format detection is mostly the same as compression * detection, with two significant differences: The bidders * use the read_ahead calls above to examine the stream rather * than having the supervisor hand them a block of data to * examine, and the auction is repeated for every header. * Winning bidders should set the archive_format and * archive_format_name appropriately. Bid routines should * check archive_format and decline to bid if the format of * the last header was incompatible. * * Again, write support is considerably simpler because there's * no need for an auction. */ int archive_format; const char *archive_format_name; struct archive_format_descriptor { int (*bid)(struct archive *); int (*read_header)(struct archive *, struct archive_entry *); int (*read_data)(struct archive *, const void **, size_t *, off_t *); + int (*read_data_skip)(struct archive *); int (*cleanup)(struct archive *); void *format_data; /* Format-specific data for readers. */ } formats[4]; struct archive_format_descriptor *format; /* Active format. */ /* * Storage for format-specific data. Note that there can be * multiple format readers active at one time, so we need to * allow for multiple format readers to have their data * available. The pformat_data slot here is the solution: on * read, it is gauranteed to always point to a void* variable * that the format can use. */ void **pformat_data; /* Pointer to current format_data. */ void *format_data; /* Used by writers. */ /* * Pointers to format-specific functions for writing. They're * initialized by archive_write_set_format_XXX() calls. */ int (*format_init)(struct archive *); /* Only used on write. */ int (*format_finish)(struct archive *); int (*format_finish_entry)(struct archive *); int (*format_write_header)(struct archive *, struct archive_entry *); int (*format_write_data)(struct archive *, const void *buff, size_t); /* * Various information needed by archive_extract. */ struct extract *extract; void (*extract_progress)(void *); void *extract_progress_user_data; void (*cleanup_archive_extract)(struct archive *); int archive_error_number; const char *error; struct archive_string error_string; }; /* * Utility function to format a USTAR header into a buffer. If * "strict" is set, this tries to create the absolutely most portable * version of a ustar header. If "strict" is set to 0, then it will * relax certain requirements. */ int __archive_write_format_header_ustar(struct archive *, char buff[512], struct archive_entry *, int tartype, int strict); #define ARCHIVE_STATE_ANY 0xFFFFU #define ARCHIVE_STATE_NEW 1U #define ARCHIVE_STATE_HEADER 2U #define ARCHIVE_STATE_DATA 4U #define ARCHIVE_STATE_EOF 8U #define ARCHIVE_STATE_CLOSED 0x10U #define ARCHIVE_STATE_FATAL 0x8000U /* Check magic value and state; exit if it isn't valid. */ void __archive_check_magic(struct archive *, unsigned magic, unsigned state, const char *func); #define archive_check_magic(a,m,s) \ __archive_check_magic((a), (m), (s), __func__) int __archive_read_register_format(struct archive *a, void *format_data, int (*bid)(struct archive *), int (*read_header)(struct archive *, struct archive_entry *), int (*read_data)(struct archive *, const void **, size_t *, off_t *), + int (*read_data_skip)(struct archive *), int (*cleanup)(struct archive *)); int __archive_read_register_compression(struct archive *a, int (*bid)(const void *, size_t), int (*init)(struct archive *, const void *, size_t)); void __archive_errx(int retvalue, const char *msg); #define err_combine(a,b) ((a) < (b) ? (a) : (b)) #endif Index: head/lib/libarchive/archive_read.c =================================================================== --- head/lib/libarchive/archive_read.c (revision 144702) +++ head/lib/libarchive/archive_read.c (revision 144703) @@ -1,560 +1,568 @@ /*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This file contains the "essential" portions of the read API, that * is, stuff that will probably always be used by any client that * actually needs to read an archive. Optional pieces have been, as * far as possible, separated out into separate files to avoid * needlessly bloating statically-linked clients. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "archive.h" #include "archive_entry.h" #include "archive_private.h" static int choose_decompressor(struct archive *, const void*, size_t); static int choose_format(struct archive *); /* * Allocate, initialize and return a struct archive object. */ struct archive * archive_read_new(void) { struct archive *a; char *nulls; a = malloc(sizeof(*a)); memset(a, 0, sizeof(*a)); a->user_uid = geteuid(); a->magic = ARCHIVE_READ_MAGIC; a->bytes_per_block = ARCHIVE_DEFAULT_BYTES_PER_BLOCK; a->null_length = 1024; nulls = malloc(a->null_length); memset(nulls, 0, a->null_length); a->nulls = nulls; a->state = ARCHIVE_STATE_NEW; a->entry = archive_entry_new(); /* We always support uncompressed archives. */ archive_read_support_compression_none((struct archive*)a); return (a); } /* * Set the block size. */ /* int archive_read_set_bytes_per_block(struct archive *a, int bytes_per_block) { archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW); if (bytes_per_block < 1) bytes_per_block = 1; a->bytes_per_block = bytes_per_block; return (0); } */ /* * Open the archive */ int archive_read_open(struct archive *a, void *client_data, archive_open_callback *opener, archive_read_callback *reader, archive_close_callback *closer) { const void *buffer; ssize_t bytes_read; int high_bidder; int e; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW); if (reader == NULL) __archive_errx(1, "No reader function provided to archive_read_open"); a->client_reader = reader; a->client_opener = opener; a->client_closer = closer; a->client_data = client_data; a->state = ARCHIVE_STATE_HEADER; /* Open data source. */ if (a->client_opener != NULL) { e =(a->client_opener)(a, a->client_data); if (e != 0) return (e); } /* Read first block now for format detection. */ bytes_read = (a->client_reader)(a, a->client_data, &buffer); /* client_reader should have already set error information. */ if (bytes_read < 0) return (ARCHIVE_FATAL); /* An empty archive is a serious error. */ if (bytes_read == 0) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Empty input file"); return (ARCHIVE_FATAL); } /* Select a decompression routine. */ high_bidder = choose_decompressor(a, buffer, bytes_read); if (high_bidder < 0) return (ARCHIVE_FATAL); /* Initialize decompression routine with the first block of data. */ e = (a->decompressors[high_bidder].init)(a, buffer, bytes_read); return (e); } /* * Allow each registered decompression routine to bid on whether it * wants to handle this stream. Return index of winning bidder. */ static int choose_decompressor(struct archive *a, const void *buffer, size_t bytes_read) { int decompression_slots, i, bid, best_bid, best_bid_slot; decompression_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]); best_bid = -1; best_bid_slot = -1; for (i = 0; i < decompression_slots; i++) { if (a->decompressors[i].bid) { bid = (a->decompressors[i].bid)(buffer, bytes_read); if ((bid > best_bid) || (best_bid_slot < 0)) { best_bid = bid; best_bid_slot = i; } } } /* * There were no bidders; this is a serious programmer error * and demands a quick and definitive abort. */ if (best_bid_slot < 0) __archive_errx(1, "No decompressors were registered; you " "must call at least one " "archive_read_support_compression_XXX function in order " "to successfully read an archive."); /* * There were bidders, but no non-zero bids; this means we can't * support this stream. */ if (best_bid < 1) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized archive format"); return (ARCHIVE_FATAL); } return (best_bid_slot); } /* * Read header of next entry. */ int archive_read_next_header(struct archive *a, struct archive_entry **entryp) { struct archive_entry *entry; int slot, ret; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA); *entryp = NULL; entry = a->entry; archive_entry_clear(entry); archive_string_empty(&a->error_string); /* * If client didn't consume entire data, skip any remainder * (This is especially important for GNU incremental directories.) */ if (a->state == ARCHIVE_STATE_DATA) { ret = archive_read_data_skip(a); if (ret == ARCHIVE_EOF) { archive_set_error(a, EIO, "Premature end-of-file."); a->state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } + if (ret != ARCHIVE_OK) + return (ret); } /* Record start-of-header. */ a->header_position = a->file_position; slot = choose_format(a); if (slot < 0) { a->state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } a->format = &(a->formats[slot]); a->pformat_data = &(a->format->format_data); ret = (a->format->read_header)(a, entry); /* * EOF and FATAL are persistent at this layer. By * modifying the state, we gaurantee that future calls to * read a header or read data will fail. */ switch (ret) { case ARCHIVE_EOF: a->state = ARCHIVE_STATE_EOF; break; case ARCHIVE_OK: a->state = ARCHIVE_STATE_DATA; break; case ARCHIVE_WARN: a->state = ARCHIVE_STATE_DATA; break; case ARCHIVE_RETRY: break; case ARCHIVE_FATAL: a->state = ARCHIVE_STATE_FATAL; break; } *entryp = entry; a->read_data_output_offset = 0; a->read_data_remaining = 0; return (ret); } /* * Allow each registered format to bid on whether it wants to handle * the next entry. Return index of winning bidder. */ static int choose_format(struct archive *a) { int slots; int i; int bid, best_bid; int best_bid_slot; slots = sizeof(a->formats) / sizeof(a->formats[0]); best_bid = -1; best_bid_slot = -1; /* Set up a->format and a->pformat_data for convenience of bidders. */ a->format = &(a->formats[0]); for (i = 0; i < slots; i++, a->format++) { if (a->format->bid) { a->pformat_data = &(a->format->format_data); bid = (a->format->bid)(a); if (bid == ARCHIVE_FATAL) return (ARCHIVE_FATAL); if ((bid > best_bid) || (best_bid_slot < 0)) { best_bid = bid; best_bid_slot = i; } } } /* * There were no bidders; this is a serious programmer error * and demands a quick and definitive abort. */ if (best_bid_slot < 0) __archive_errx(1, "No formats were registered; you must " "invoke at least one archive_read_support_format_XXX " "function in order to successfully read an archive."); /* * There were bidders, but no non-zero bids; this means we * can't support this stream. */ if (best_bid < 1) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized archive format"); return (ARCHIVE_FATAL); } return (best_bid_slot); } /* * Return the file offset (within the uncompressed data stream) where * the last header started. */ int64_t archive_read_header_position(struct archive *a) { return (a->header_position); } /* * Read data from an archive entry, using a read(2)-style interface. * This is a convenience routine that just calls * archive_read_data_block and copies the results into the client * buffer, filling any gaps with zero bytes. Clients using this * API can be completely ignorant of sparse-file issues; sparse files * will simply be padded with nulls. * * DO NOT intermingle calls to this function and archive_read_data_block * to read a single entry body. */ ssize_t archive_read_data(struct archive *a, void *buff, size_t s) { off_t remaining; char *dest; size_t bytes_read; size_t len; int r; bytes_read = 0; dest = buff; while (s > 0) { if (a->read_data_remaining <= 0) { r = archive_read_data_block(a, (const void **)&a->read_data_block, &a->read_data_remaining, &a->read_data_offset); if (r == ARCHIVE_EOF) return (bytes_read); if (r != ARCHIVE_OK) return (r); } if (a->read_data_offset < a->read_data_output_offset) { remaining = a->read_data_output_offset - a->read_data_offset; if (remaining > (off_t)s) remaining = (off_t)s; len = (size_t)remaining; memset(dest, 0, len); a->read_data_output_offset += len; s -= len; bytes_read += len; } else { len = a->read_data_remaining; if (len > s) len = s; memcpy(dest, a->read_data_block, len); s -= len; a->read_data_remaining -= len; a->read_data_output_offset += len; a->read_data_offset += len; dest += len; bytes_read += len; } } return (ARCHIVE_OK); } /* * Skip over all remaining data in this entry. */ int archive_read_data_skip(struct archive *a) { int r; const void *buff; ssize_t size; off_t offset; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA); - while ((r = archive_read_data_block(a, &buff, &size, &offset)) == - ARCHIVE_OK) - ; + if (a->format->read_data_skip != NULL) + r = (a->format->read_data_skip)(a); + else { + while ((r = archive_read_data_block(a, &buff, &size, &offset)) + == ARCHIVE_OK) + ; + } if (r == ARCHIVE_EOF) r = ARCHIVE_OK; a->state = ARCHIVE_STATE_HEADER; return (r); } /* * Read the next block of entry data from the archive. * This is a zero-copy interface; the client receives a pointer, * size, and file offset of the next available block of data. * * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if * the end of entry is encountered. */ int archive_read_data_block(struct archive *a, const void **buff, size_t *size, off_t *offset) { archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA); if (a->format->read_data == NULL) { archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, "Internal error: " "No format_read_data_block function registered"); return (ARCHIVE_FATAL); } return (a->format->read_data)(a, buff, size, offset); } /* * Close the file and release most resources. * * Be careful: client might just call read_new and then read_finish. * Don't assume we actually read anything or performed any non-trivial * initialization. */ int archive_read_close(struct archive *a) { archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY); a->state = ARCHIVE_STATE_CLOSED; /* Call cleanup functions registered by optional components. */ if (a->cleanup_archive_extract != NULL) (a->cleanup_archive_extract)(a); /* TODO: Finish the format processing. */ /* Close the input machinery. */ if (a->compression_finish != NULL) (a->compression_finish)(a); return (ARCHIVE_OK); } /* * Release memory and other resources. */ void archive_read_finish(struct archive *a) { int i; int slots; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY); if (a->state != ARCHIVE_STATE_CLOSED) archive_read_close(a); /* Cleanup format-specific data. */ slots = sizeof(a->formats) / sizeof(a->formats[0]); for (i = 0; i < slots; i++) { a->pformat_data = &(a->formats[i].format_data); if (a->formats[i].cleanup) (a->formats[i].cleanup)(a); } /* Casting a pointer to int allows us to remove 'const.' */ free((void *)(uintptr_t)(const void *)a->nulls); archive_string_free(&a->error_string); if (a->entry) archive_entry_free(a->entry); a->magic = 0; free(a); } /* * Used internally by read format handlers to register their bid and * initialization functions. */ int __archive_read_register_format(struct archive *a, void *format_data, int (*bid)(struct archive *), int (*read_header)(struct archive *, struct archive_entry *), int (*read_data)(struct archive *, const void **, size_t *, off_t *), + int (*read_data_skip)(struct archive *), int (*cleanup)(struct archive *)) { int i, number_slots; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW); number_slots = sizeof(a->formats) / sizeof(a->formats[0]); for (i = 0; i < number_slots; i++) { if (a->formats[i].bid == bid) return (ARCHIVE_WARN); /* We've already installed */ if (a->formats[i].bid == NULL) { a->formats[i].bid = bid; a->formats[i].read_header = read_header; a->formats[i].read_data = read_data; + a->formats[i].read_data_skip = read_data_skip; a->formats[i].cleanup = cleanup; a->formats[i].format_data = format_data; return (ARCHIVE_OK); } } __archive_errx(1, "Not enough slots for format registration"); return (ARCHIVE_FATAL); /* Never actually called. */ } /* * Used internally by decompression routines to register their bid and * initialization functions. */ int __archive_read_register_compression(struct archive *a, int (*bid)(const void *, size_t), int (*init)(struct archive *, const void *, size_t)) { int i, number_slots; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW); number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]); for (i = 0; i < number_slots; i++) { if (a->decompressors[i].bid == bid) return (ARCHIVE_OK); /* We've already installed */ if (a->decompressors[i].bid == NULL) { a->decompressors[i].bid = bid; a->decompressors[i].init = init; return (ARCHIVE_OK); } } __archive_errx(1, "Not enough slots for compression registration"); return (ARCHIVE_FATAL); /* Never actually executed. */ } Index: head/lib/libarchive/archive_read_support_format_cpio.c =================================================================== --- head/lib/libarchive/archive_read_support_format_cpio.c (revision 144702) +++ head/lib/libarchive/archive_read_support_format_cpio.c (revision 144703) @@ -1,587 +1,588 @@ /*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #include /* #include */ /* See archive_platform.h */ #include #include #include #include "archive.h" #include "archive_entry.h" #include "archive_private.h" struct cpio_bin_header { unsigned char c_magic[2]; unsigned char c_dev[2]; unsigned char c_ino[2]; unsigned char c_mode[2]; unsigned char c_uid[2]; unsigned char c_gid[2]; unsigned char c_nlink[2]; unsigned char c_rdev[2]; unsigned char c_mtime[4]; unsigned char c_namesize[2]; unsigned char c_filesize[4]; }; struct cpio_odc_header { char c_magic[6]; char c_dev[6]; char c_ino[6]; char c_mode[6]; char c_uid[6]; char c_gid[6]; char c_nlink[6]; char c_rdev[6]; char c_mtime[11]; char c_namesize[6]; char c_filesize[11]; }; struct cpio_newc_header { char c_magic[6]; char c_ino[8]; char c_mode[8]; char c_uid[8]; char c_gid[8]; char c_nlink[8]; char c_mtime[8]; char c_filesize[8]; char c_devmajor[8]; char c_devminor[8]; char c_rdevmajor[8]; char c_rdevminor[8]; char c_namesize[8]; char c_crc[8]; }; struct links_entry { struct links_entry *next; struct links_entry *previous; int links; dev_t dev; ino_t ino; char *name; }; #define CPIO_MAGIC 0x13141516 struct cpio { int magic; int (*read_header)(struct archive *, struct cpio *, struct stat *, size_t *, size_t *); struct links_entry *links_head; struct archive_string entry_name; struct archive_string entry_linkname; off_t entry_bytes_remaining; off_t entry_offset; off_t entry_padding; }; static int64_t atol16(const char *, unsigned); static int64_t atol8(const char *, unsigned); static int archive_read_format_cpio_bid(struct archive *); static int archive_read_format_cpio_cleanup(struct archive *); static int archive_read_format_cpio_read_data(struct archive *, const void **, size_t *, off_t *); static int archive_read_format_cpio_read_header(struct archive *, struct archive_entry *); static int be4(const unsigned char *); static int header_bin_be(struct archive *, struct cpio *, struct stat *, size_t *, size_t *); static int header_bin_le(struct archive *, struct cpio *, struct stat *, size_t *, size_t *); static int header_newc(struct archive *, struct cpio *, struct stat *, size_t *, size_t *); static int header_odc(struct archive *, struct cpio *, struct stat *, size_t *, size_t *); static int le4(const unsigned char *); static void record_hardlink(struct cpio *cpio, struct archive_entry *entry, const struct stat *st); int archive_read_support_format_cpio(struct archive *a) { struct cpio *cpio; int r; cpio = malloc(sizeof(*cpio)); memset(cpio, 0, sizeof(*cpio)); cpio->magic = CPIO_MAGIC; r = __archive_read_register_format(a, cpio, archive_read_format_cpio_bid, archive_read_format_cpio_read_header, archive_read_format_cpio_read_data, + NULL, archive_read_format_cpio_cleanup); if (r != ARCHIVE_OK) free(cpio); return (ARCHIVE_OK); } static int archive_read_format_cpio_bid(struct archive *a) { int bid, bytes_read; const void *h; const unsigned char *p; struct cpio *cpio; cpio = *(a->pformat_data); bid = 0; bytes_read = (a->compression_read_ahead)(a, &h, 6); /* Convert error code into error return. */ if (bytes_read < 0) return ((int)bytes_read); if (bytes_read < 6) return (-1); p = h; if (memcmp(p, "070707", 6) == 0) { /* ASCII cpio archive (odc, POSIX.1) */ cpio->read_header = header_odc; bid += 48; /* * XXX TODO: More verification; Could check that only octal * digits appear in appropriate header locations. XXX */ } else if (memcmp(p, "070701", 6) == 0) { /* ASCII cpio archive (SVR4 without CRC) */ cpio->read_header = header_newc; bid += 48; /* * XXX TODO: More verification; Could check that only hex * digits appear in appropriate header locations. XXX */ } else if (memcmp(p, "070702", 6) == 0) { /* ASCII cpio archive (SVR4 with CRC) */ /* XXX TODO: Flag that we should check the CRC. XXX */ cpio->read_header = header_newc; bid += 48; /* * XXX TODO: More verification; Could check that only hex * digits appear in appropriate header locations. XXX */ } else if (p[0] * 256 + p[1] == 070707) { /* big-endian binary cpio archives */ cpio->read_header = header_bin_be; bid += 16; /* Is more verification possible here? */ } else if (p[0] + p[1] * 256 == 070707) { /* little-endian binary cpio archives */ cpio->read_header = header_bin_le; bid += 16; /* Is more verification possible here? */ } else return (ARCHIVE_WARN); return (bid); } static int archive_read_format_cpio_read_header(struct archive *a, struct archive_entry *entry) { struct stat st; struct cpio *cpio; size_t bytes; const void *h; size_t namelength; size_t name_pad; int r; memset(&st, 0, sizeof(st)); cpio = *(a->pformat_data); r = (cpio->read_header(a, cpio, &st, &namelength, &name_pad)); if (r != ARCHIVE_OK) return (r); /* Assign all of the 'stat' fields at once. */ archive_entry_copy_stat(entry, &st); /* Read name from buffer. */ bytes = (a->compression_read_ahead)(a, &h, namelength + name_pad); if (bytes < namelength + name_pad) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, namelength + name_pad); archive_strncpy(&cpio->entry_name, h, namelength); archive_entry_set_pathname(entry, cpio->entry_name.s); cpio->entry_offset = 0; /* If this is a symlink, read the link contents. */ if (S_ISLNK(st.st_mode)) { bytes = (a->compression_read_ahead)(a, &h, cpio->entry_bytes_remaining); if ((off_t)bytes < cpio->entry_bytes_remaining) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, cpio->entry_bytes_remaining); archive_strncpy(&cpio->entry_linkname, h, cpio->entry_bytes_remaining); archive_entry_set_symlink(entry, cpio->entry_linkname.s); cpio->entry_bytes_remaining = 0; } /* Compare name to "TRAILER!!!" to test for end-of-archive. */ if (namelength == 11 && strcmp(h,"TRAILER!!!")==0) { /* TODO: Store file location of start of block. */ archive_set_error(a, 0, NULL); return (ARCHIVE_EOF); } /* Detect and record hardlinks to previously-extracted entries. */ record_hardlink(cpio, entry, &st); return (ARCHIVE_OK); } static int archive_read_format_cpio_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset) { ssize_t bytes_read; struct cpio *cpio; cpio = *(a->pformat_data); if (cpio->entry_bytes_remaining > 0) { bytes_read = (a->compression_read_ahead)(a, buff, 1); if (bytes_read <= 0) return (ARCHIVE_FATAL); if (bytes_read > cpio->entry_bytes_remaining) bytes_read = cpio->entry_bytes_remaining; *size = bytes_read; *offset = cpio->entry_offset; cpio->entry_offset += bytes_read; cpio->entry_bytes_remaining -= bytes_read; (a->compression_read_consume)(a, bytes_read); return (ARCHIVE_OK); } else { while (cpio->entry_padding > 0) { bytes_read = (a->compression_read_ahead)(a, buff, 1); if (bytes_read <= 0) return (ARCHIVE_FATAL); if (bytes_read > cpio->entry_padding) bytes_read = cpio->entry_padding; (a->compression_read_consume)(a, bytes_read); cpio->entry_padding -= bytes_read; } *buff = NULL; *size = 0; *offset = cpio->entry_offset; return (ARCHIVE_EOF); } } static int header_newc(struct archive *a, struct cpio *cpio, struct stat *st, size_t *namelength, size_t *name_pad) { const void *h; const struct cpio_newc_header *header; size_t bytes; a->archive_format = ARCHIVE_FORMAT_CPIO; a->archive_format_name = "ASCII cpio (SVR4 with no CRC)"; /* Read fixed-size portion of header. */ bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_newc_header)); if (bytes < sizeof(struct cpio_newc_header)) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, sizeof(struct cpio_newc_header)); /* Parse out hex fields into struct stat. */ header = h; st->st_ino = atol16(header->c_ino, sizeof(header->c_ino)); st->st_mode = atol16(header->c_mode, sizeof(header->c_mode)); st->st_uid = atol16(header->c_uid, sizeof(header->c_uid)); st->st_gid = atol16(header->c_gid, sizeof(header->c_gid)); st->st_nlink = atol16(header->c_nlink, sizeof(header->c_nlink)); st->st_mtime = atol16(header->c_mtime, sizeof(header->c_mtime)); *namelength = atol16(header->c_namesize, sizeof(header->c_namesize)); /* Pad name to 2 more than a multiple of 4. */ *name_pad = (2 - *namelength) & 3; /* * Note: entry_bytes_remaining is at least 64 bits and * therefore gauranteed to be big enough for a 33-bit file * size. struct stat.st_size may only be 32 bits, so * assigning there first could lose information. */ cpio->entry_bytes_remaining = atol16(header->c_filesize, sizeof(header->c_filesize)); st->st_size = cpio->entry_bytes_remaining; /* Pad file contents to a multiple of 4. */ cpio->entry_padding = 3 & -cpio->entry_bytes_remaining; return (ARCHIVE_OK); } static int header_odc(struct archive *a, struct cpio *cpio, struct stat *st, size_t *namelength, size_t *name_pad) { const void *h; const struct cpio_odc_header *header; size_t bytes; a->archive_format = ARCHIVE_FORMAT_CPIO; a->archive_format_name = "POSIX octet-oriented cpio"; /* Read fixed-size portion of header. */ bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_odc_header)); if (bytes < sizeof(struct cpio_odc_header)) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, sizeof(struct cpio_odc_header)); /* Parse out octal fields into struct stat. */ header = h; st->st_dev = atol8(header->c_dev, sizeof(header->c_dev)); st->st_ino = atol8(header->c_ino, sizeof(header->c_ino)); st->st_mode = atol8(header->c_mode, sizeof(header->c_mode)); st->st_uid = atol8(header->c_uid, sizeof(header->c_uid)); st->st_gid = atol8(header->c_gid, sizeof(header->c_gid)); st->st_nlink = atol8(header->c_nlink, sizeof(header->c_nlink)); st->st_rdev = atol8(header->c_rdev, sizeof(header->c_rdev)); st->st_mtime = atol8(header->c_mtime, sizeof(header->c_mtime)); *namelength = atol8(header->c_namesize, sizeof(header->c_namesize)); *name_pad = 0; /* No padding of filename. */ /* * Note: entry_bytes_remaining is at least 64 bits and * therefore gauranteed to be big enough for a 33-bit file * size. struct stat.st_size may only be 32 bits, so * assigning there first could lose information. */ cpio->entry_bytes_remaining = atol8(header->c_filesize, sizeof(header->c_filesize)); st->st_size = cpio->entry_bytes_remaining; cpio->entry_padding = 0; return (ARCHIVE_OK); } static int header_bin_le(struct archive *a, struct cpio *cpio, struct stat *st, size_t *namelength, size_t *name_pad) { const void *h; const struct cpio_bin_header *header; size_t bytes; a->archive_format = ARCHIVE_FORMAT_CPIO; a->archive_format_name = "cpio (little-endian binary)"; /* Read fixed-size portion of header. */ bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_bin_header)); if (bytes < sizeof(struct cpio_bin_header)) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, sizeof(struct cpio_bin_header)); /* Parse out binary fields into struct stat. */ header = h; st->st_dev = header->c_dev[0] + header->c_dev[1] * 256; st->st_ino = header->c_ino[0] + header->c_ino[1] * 256; st->st_mode = header->c_mode[0] + header->c_mode[1] * 256; st->st_uid = header->c_uid[0] + header->c_uid[1] * 256; st->st_gid = header->c_gid[0] + header->c_gid[1] * 256; st->st_nlink = header->c_nlink[0] + header->c_nlink[1] * 256; st->st_rdev = header->c_rdev[0] + header->c_rdev[1] * 256; st->st_mtime = le4(header->c_mtime); *namelength = header->c_namesize[0] + header->c_namesize[1] * 256; *name_pad = *namelength & 1; /* Pad to even. */ cpio->entry_bytes_remaining = le4(header->c_filesize); st->st_size = cpio->entry_bytes_remaining; cpio->entry_padding = cpio->entry_bytes_remaining & 1; /* Pad to even. */ return (ARCHIVE_OK); } static int header_bin_be(struct archive *a, struct cpio *cpio, struct stat *st, size_t *namelength, size_t *name_pad) { const void *h; const struct cpio_bin_header *header; size_t bytes; a->archive_format = ARCHIVE_FORMAT_CPIO; a->archive_format_name = "cpio (big-endian binary)"; /* Read fixed-size portion of header. */ bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_bin_header)); if (bytes < sizeof(struct cpio_bin_header)) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, sizeof(struct cpio_bin_header)); /* Parse out binary fields into struct stat. */ header = h; st->st_dev = header->c_dev[0] * 256 + header->c_dev[1]; st->st_ino = header->c_ino[0] * 256 + header->c_ino[1]; st->st_mode = header->c_mode[0] * 256 + header->c_mode[1]; st->st_uid = header->c_uid[0] * 256 + header->c_uid[1]; st->st_gid = header->c_gid[0] * 256 + header->c_gid[1]; st->st_nlink = header->c_nlink[0] * 256 + header->c_nlink[1]; st->st_rdev = header->c_rdev[0] * 256 + header->c_rdev[1]; st->st_mtime = be4(header->c_mtime); *namelength = header->c_namesize[0] * 256 + header->c_namesize[1]; *name_pad = *namelength & 1; /* Pad to even. */ cpio->entry_bytes_remaining = be4(header->c_filesize); st->st_size = cpio->entry_bytes_remaining; cpio->entry_padding = cpio->entry_bytes_remaining & 1; /* Pad to even. */ return (ARCHIVE_OK); } static int archive_read_format_cpio_cleanup(struct archive *a) { struct cpio *cpio; cpio = *(a->pformat_data); /* Free inode->name map */ while (cpio->links_head != NULL) { struct links_entry *lp = cpio->links_head->next; if (cpio->links_head->name) free(cpio->links_head->name); free(cpio->links_head); cpio->links_head = lp; } free(cpio); *(a->pformat_data) = NULL; return (ARCHIVE_OK); } static int le4(const unsigned char *p) { return ((p[0]<<16) + (p[1]<<24) + (p[2]<<0) + (p[3]<<8)); } static int be4(const unsigned char *p) { return (p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24)); } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t atol8(const char *p, unsigned char_cnt) { int64_t l; int digit; l = 0; while (char_cnt-- > 0) { if (*p >= '0' && *p <= '7') digit = *p - '0'; else return (l); p++; l <<= 3; l |= digit; } return (l); } static int64_t atol16(const char *p, unsigned char_cnt) { int64_t l; int digit; l = 0; while (char_cnt-- > 0) { if (*p >= 'a' && *p <= 'f') digit = *p - 'a' + 10; else if (*p >= 'A' && *p <= 'F') digit = *p - 'A' + 10; else if (*p >= '0' && *p <= '9') digit = *p - '0'; else return (l); p++; l <<= 4; l |= digit; } return (l); } static void record_hardlink(struct cpio *cpio, struct archive_entry *entry, const struct stat *st) { struct links_entry *le; /* * First look in the list of multiply-linked files. If we've * already dumped it, convert this entry to a hard link entry. */ for (le = cpio->links_head; le; le = le->next) { if (le->dev == st->st_dev && le->ino == st->st_ino) { archive_entry_set_hardlink(entry, le->name); if (--le->links <= 0) { if (le->previous != NULL) le->previous->next = le->next; if (le->next != NULL) le->next->previous = le->previous; if (cpio->links_head == le) cpio->links_head = le->next; free(le); } return; } } le = malloc(sizeof(struct links_entry)); if (cpio->links_head != NULL) cpio->links_head->previous = le; le->next = cpio->links_head; le->previous = NULL; cpio->links_head = le; le->dev = st->st_dev; le->ino = st->st_ino; le->links = st->st_nlink - 1; le->name = strdup(archive_entry_pathname(entry)); } Index: head/lib/libarchive/archive_read_support_format_iso9660.c =================================================================== --- head/lib/libarchive/archive_read_support_format_iso9660.c (revision 144702) +++ head/lib/libarchive/archive_read_support_format_iso9660.c (revision 144703) @@ -1,1000 +1,1001 @@ /*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #include /* #include */ /* See archive_platform.h */ #include #include #include #include #include #include "archive.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_string.h" /* * An overview of ISO 9660 format: * * Each disk is laid out as follows: * * 32k reserved for private use * * Volume descriptor table. Each volume descriptor * is 2k and specifies basic format information. * The "Primary Volume Descriptor" (PVD) is defined by the * standard and should always be present; other volume * descriptors include various vendor-specific extensions. * * Files and directories. Each file/dir is specified by * an "extent" (starting sector and length in bytes). * Dirs are just files with directory records packed one * after another. The PVD contains a single dir entry * specifying the location of the root directory. Everything * else follows from there. * * This module works by first reading the volume descriptors, then * building a list of directory entries, sorted by starting * sector. At each step, I look for the earliest dir entry that * hasn't yet been read, seek forward to that location and read * that entry. If it's a dir, I slurp in the new dir entries and * add them to the heap; if it's a regular file, I return the * corresponding archive_entry and wait for the client to request * the file body. This strategy allows us to read most compliant * CDs with a single pass through the data, as required by libarchive. */ /* Structure of on-disk PVD. */ struct iso9660_primary_volume_descriptor { unsigned char type[1]; char id[5]; unsigned char version[1]; char reserved1[1]; char system_id[32]; char volume_id[32]; char reserved2[8]; char volume_space_size[8]; char reserved3[32]; char volume_set_size[4]; char volume_sequence_number[4]; char logical_block_size[4]; char path_table_size[8]; char type_1_path_table[4]; char opt_type_1_path_table[4]; char type_m_path_table[4]; char opt_type_m_path_table[4]; char root_directory_record[34]; char volume_set_id[128]; char publisher_id[128]; char preparer_id[128]; char application_id[128]; char copyright_file_id[37]; char abstract_file_id[37]; char bibliographic_file_id[37]; char creation_date[17]; char modification_date[17]; char expiration_date[17]; char effective_date[17]; char file_structure_version[1]; char reserved4[1]; char application_data[512]; }; /* Structure of an on-disk directory record. */ struct iso9660_directory_record { unsigned char length[1]; unsigned char ext_attr_length[1]; unsigned char extent[8]; unsigned char size[8]; char date[7]; unsigned char flags[1]; unsigned char file_unit_size[1]; unsigned char interleave[1]; unsigned char volume_sequence_number[4]; unsigned char name_len[1]; char name[1]; }; /* * Our private data. */ /* In-memory storage for a directory record. */ struct file_info { struct file_info *parent; int refcount; uint64_t offset; /* Offset on disk. */ uint64_t size; /* File size in bytes. */ uint64_t ce_offset; /* Offset of CE */ uint64_t ce_size; /* Size of CE */ time_t mtime; /* File last modified time. */ time_t atime; /* File last accessed time. */ time_t ctime; /* File creation time. */ mode_t mode; uid_t uid; gid_t gid; ino_t inode; int nlinks; char *name; /* Null-terminated filename. */ struct archive_string symlink; }; struct iso9660 { int magic; #define ISO9660_MAGIC 0x96609660 int bid; /* If non-zero, return this as our bid. */ struct archive_string pathname; char seenRockridge; /* Set true if RR extensions are used. */ unsigned char suspOffset; uint64_t previous_offset; uint64_t previous_size; struct archive_string previous_pathname; /* TODO: Make this a heap for fast inserts and deletions. */ struct file_info **pending_files; int pending_files_allocated; int pending_files_used; uint64_t current_position; ssize_t logical_block_size; off_t entry_sparse_offset; ssize_t entry_bytes_remaining; }; static void add_entry(struct iso9660 *iso9660, struct file_info *file); static int archive_read_format_iso9660_bid(struct archive *); static int archive_read_format_iso9660_cleanup(struct archive *); static int archive_read_format_iso9660_read_data(struct archive *, const void **, size_t *, off_t *); static int archive_read_format_iso9660_read_header(struct archive *, struct archive_entry *); static const char *build_pathname(struct archive_string *, struct file_info *); static void dump_isodirrec(FILE *, const struct iso9660_directory_record *); static time_t isodate17(const void *); static time_t isodate7(const void *); static int isPVD(struct iso9660 *, const char *); static struct file_info *next_entry(struct iso9660 *); static int next_entry_seek(struct archive *a, struct iso9660 *iso9660, struct file_info **pfile); static struct file_info * parse_file_info(struct iso9660 *iso9660, struct file_info *parent, const struct iso9660_directory_record *isodirrec); static void parse_rockridge(struct iso9660 *iso9660, struct file_info *file, const unsigned char *start, const unsigned char *end); static void release_file(struct iso9660 *, struct file_info *); static int toi(const void *p, int n); int archive_read_support_format_iso9660(struct archive *a) { struct iso9660 *iso9660; int r; iso9660 = malloc(sizeof(*iso9660)); memset(iso9660, 0, sizeof(*iso9660)); iso9660->magic = ISO9660_MAGIC; iso9660->bid = -1; /* We haven't yet bid. */ r = __archive_read_register_format(a, iso9660, archive_read_format_iso9660_bid, archive_read_format_iso9660_read_header, archive_read_format_iso9660_read_data, + NULL, archive_read_format_iso9660_cleanup); if (r != ARCHIVE_OK) { free(iso9660); return (r); } return (ARCHIVE_OK); } static int archive_read_format_iso9660_bid(struct archive *a) { struct iso9660 *iso9660; ssize_t bytes_read; const void *h; const char *p; iso9660 = *(a->pformat_data); if (iso9660->bid >= 0) return (iso9660->bid); /* * Skip the first 32k (reserved area) and get the first * 8 sectors of the volume descriptor table. Of course, * if the I/O layer gives us more, we'll take it. */ bytes_read = (a->compression_read_ahead)(a, &h, 32768 + 8*2048); if (bytes_read < 32768 + 8*2048) return (iso9660->bid = -1); p = (const char *)h; /* Skip the reserved area. */ bytes_read -= 32768; p += 32768; /* Check each volume descriptor to locate the PVD. */ for (; bytes_read > 2048; bytes_read -= 2048, p += 2048) { iso9660->bid = isPVD(iso9660, p); if (iso9660->bid > 0) return (iso9660->bid); if (*p == '\xff') /* End-of-volume-descriptor marker. */ break; } /* We didn't find a valid PVD; return a bid of zero. */ iso9660->bid = 0; return (iso9660->bid); } static int isPVD(struct iso9660 *iso9660, const char *h) { const struct iso9660_primary_volume_descriptor *voldesc; struct file_info *file; if (h[0] != 1) return (0); if (memcmp(h+1, "CD001", 5) != 0) return (0); voldesc = (const struct iso9660_primary_volume_descriptor *)h; iso9660->logical_block_size = toi(&voldesc->logical_block_size, 2); /* Store the root directory in the pending list. */ file = parse_file_info(iso9660, NULL, (struct iso9660_directory_record *)&voldesc->root_directory_record); add_entry(iso9660, file); return (48); } static int archive_read_format_iso9660_read_header(struct archive *a, struct archive_entry *entry) { struct stat st; struct iso9660 *iso9660; struct file_info *file; ssize_t bytes_read; int r; iso9660 = *(a->pformat_data); if (iso9660->seenRockridge) { a->archive_format = ARCHIVE_FORMAT_ISO9660_ROCKRIDGE; a->archive_format_name = "ISO9660 with Rockridge extensions"; } else { a->archive_format = ARCHIVE_FORMAT_ISO9660; a->archive_format_name = "ISO9660"; } /* Get the next entry that appears after the current offset. */ r = next_entry_seek(a, iso9660, &file); if (r != ARCHIVE_OK) return (r); iso9660->entry_bytes_remaining = file->size; iso9660->entry_sparse_offset = 0; /* Offset for sparse-file-aware clients. */ /* Set up the entry structure with information about this entry. */ memset(&st, 0, sizeof(st)); st.st_mode = file->mode; st.st_uid = file->uid; st.st_gid = file->gid; st.st_nlink = file->nlinks; st.st_ino = file->inode; st.st_mtime = file->mtime; st.st_ctime = file->ctime; st.st_atime = file->atime; st.st_size = iso9660->entry_bytes_remaining; archive_entry_copy_stat(entry, &st); archive_string_empty(&iso9660->pathname); archive_entry_set_pathname(entry, build_pathname(&iso9660->pathname, file)); if (file->symlink.s != NULL) archive_entry_set_symlink(entry, file->symlink.s); /* If this entry points to the same data as the previous * entry, convert this into a hardlink to that entry. * But don't bother for zero-length files. */ if (file->offset == iso9660->previous_offset && file->size == iso9660->previous_size && file->size > 0) { archive_entry_set_hardlink(entry, iso9660->previous_pathname.s); iso9660->entry_bytes_remaining = 0; iso9660->entry_sparse_offset = 0; release_file(iso9660, file); return (ARCHIVE_OK); } /* If the offset is before our current position, we can't * seek backwards to extract it, so issue a warning. */ if (file->offset < iso9660->current_position) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Ignoring out-of-order file"); iso9660->entry_bytes_remaining = 0; iso9660->entry_sparse_offset = 0; release_file(iso9660, file); return (ARCHIVE_WARN); } iso9660->previous_size = file->size; iso9660->previous_offset = file->offset; archive_strcpy(&iso9660->previous_pathname, iso9660->pathname.s); /* If this is a directory, read in all of the entries right now. */ if (S_ISDIR(st.st_mode)) { while(iso9660->entry_bytes_remaining > 0) { const void *block; const unsigned char *p; ssize_t step = iso9660->logical_block_size; if (step > iso9660->entry_bytes_remaining) step = iso9660->entry_bytes_remaining; bytes_read = (a->compression_read_ahead)(a, &block, step); if (bytes_read < step) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Failed to read full block when scanning ISO9660 directory list"); release_file(iso9660, file); return (ARCHIVE_FATAL); } if (bytes_read > step) bytes_read = step; (a->compression_read_consume)(a, bytes_read); iso9660->current_position += bytes_read; iso9660->entry_bytes_remaining -= bytes_read; for (p = block; *p != 0 && p < (const unsigned char *)block + bytes_read; p += *p) { const struct iso9660_directory_record *dr = (const struct iso9660_directory_record *)p; struct file_info *child; /* Skip '.' entry. */ if (dr->name_len[0] == 1 && dr->name[0] == '\0') continue; /* Skip '..' entry. */ if (dr->name_len[0] == 1 && dr->name[0] == '\001') continue; child = parse_file_info(iso9660, file, dr); add_entry(iso9660, child); } } } release_file(iso9660, file); return (ARCHIVE_OK); } static int archive_read_format_iso9660_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset) { ssize_t bytes_read; struct iso9660 *iso9660; iso9660 = *(a->pformat_data); if (iso9660->entry_bytes_remaining <= 0) { *buff = NULL; *size = 0; *offset = iso9660->entry_sparse_offset; return (ARCHIVE_EOF); } bytes_read = (a->compression_read_ahead)(a, buff, 1); if (bytes_read <= 0) return (ARCHIVE_FATAL); if (bytes_read > iso9660->entry_bytes_remaining) bytes_read = iso9660->entry_bytes_remaining; *size = bytes_read; *offset = iso9660->entry_sparse_offset; iso9660->entry_sparse_offset += bytes_read; iso9660->entry_bytes_remaining -= bytes_read; iso9660->current_position += bytes_read; (a->compression_read_consume)(a, bytes_read); return (ARCHIVE_OK); } static int archive_read_format_iso9660_cleanup(struct archive *a) { struct iso9660 *iso9660; struct file_info *file; iso9660 = *(a->pformat_data); while ((file = next_entry(iso9660)) != NULL) release_file(iso9660, file); archive_string_free(&iso9660->pathname); archive_string_free(&iso9660->previous_pathname); free(iso9660); *(a->pformat_data) = NULL; return (ARCHIVE_OK); } /* * This routine parses a single ISO directory record, makes sense * of any extensions, and stores the result in memory. */ static struct file_info * parse_file_info(struct iso9660 *iso9660, struct file_info *parent, const struct iso9660_directory_record *isodirrec) { struct file_info *file; /* TODO: Sanity check that name_len doesn't exceed length, etc. */ /* Create a new file entry and copy data from the ISO dir record. */ file = malloc(sizeof(*file)); memset(file, 0, sizeof(*file)); file->parent = parent; if (parent != NULL) parent->refcount++; file->offset = toi(isodirrec->extent, 4) * iso9660->logical_block_size; file->size = toi(isodirrec->size, 4); file->mtime = isodate7(isodirrec->date); file->ctime = file->atime = file->mtime; file->name = malloc(isodirrec->name_len[0] + 1); memcpy(file->name, isodirrec->name, isodirrec->name_len[0]); file->name[(int)isodirrec->name_len[0]] = '\0'; if (isodirrec->flags[0] & 0x02) file->mode = S_IFDIR | 0700; else file->mode = S_IFREG | 0400; /* Rockridge extensions overwrite information from above. */ { const unsigned char *rr_start, *rr_end; rr_end = (const unsigned char *)isodirrec + isodirrec->length[0]; rr_start = isodirrec->name + isodirrec->name_len[0]; if ((isodirrec->name_len[0] & 1) == 0) rr_start++; rr_start += iso9660->suspOffset; parse_rockridge(iso9660, file, rr_start, rr_end); } /* DEBUGGING: Warn about attributes I don't yet fully support. */ if ((isodirrec->flags[0] & ~0x02) != 0) { fprintf(stderr, "\n ** Unrecognized flag: "); dump_isodirrec(stderr, isodirrec); fprintf(stderr, "\n"); } else if (toi(isodirrec->volume_sequence_number, 2) != 1) { fprintf(stderr, "\n ** Unrecognized sequence number: "); dump_isodirrec(stderr, isodirrec); fprintf(stderr, "\n"); } else if (isodirrec->file_unit_size[0] != 0) { fprintf(stderr, "\n ** Unexpected file unit size: "); dump_isodirrec(stderr, isodirrec); fprintf(stderr, "\n"); } else if (isodirrec->interleave[0] != 0) { fprintf(stderr, "\n ** Unexpected interleave: "); dump_isodirrec(stderr, isodirrec); fprintf(stderr, "\n"); } else if (isodirrec->ext_attr_length[0] != 0) { fprintf(stderr, "\n ** Unexpected extended attribute length: "); dump_isodirrec(stderr, isodirrec); fprintf(stderr, "\n"); } return (file); } static void add_entry(struct iso9660 *iso9660, struct file_info *file) { /* Expand our pending files list as necessary. */ if (iso9660->pending_files_used >= iso9660->pending_files_allocated) { struct file_info **new_pending_files; int new_size = iso9660->pending_files_allocated * 2; if (new_size < 1024) new_size = 1024; new_pending_files = malloc(new_size * sizeof(new_pending_files[0])); memcpy(new_pending_files, iso9660->pending_files, iso9660->pending_files_allocated * sizeof(new_pending_files[0])); if (iso9660->pending_files != NULL) free(iso9660->pending_files); iso9660->pending_files = new_pending_files; iso9660->pending_files_allocated = new_size; } iso9660->pending_files[iso9660->pending_files_used++] = file; } static void parse_rockridge(struct iso9660 *iso9660, struct file_info *file, const unsigned char *p, const unsigned char *end) { (void)iso9660; /* UNUSED */ while (p + 4 < end /* Enough space for another entry. */ && p[0] >= 'A' && p[0] <= 'Z' /* Sanity-check 1st char of name. */ && p[1] >= 'A' && p[1] <= 'Z' /* Sanity-check 2nd char of name. */ && p + p[2] <= end) { /* Sanity-check length. */ const unsigned char *data = p + 4; int data_length = p[2] - 4; int version = p[3]; /* * Yes, each 'if' here does test p[0] again. * Otherwise, the fall-through handling to catch * unsupported extensions doesn't work. */ switch(p[0]) { case 'C': if (p[0] == 'C' && p[1] == 'E' && version == 1) { /* * CE extension comprises: * 8 byte sector containing extension * 8 byte offset w/in above sector * 8 byte length of continuation */ file->ce_offset = toi(data, 4) * iso9660->logical_block_size + toi(data + 8, 4); file->ce_size = toi(data + 16, 4); break; } /* FALLTHROUGH */ case 'N': if (p[0] == 'N' && p[1] == 'M' && version == 1 && *data == 0) { /* NM extension with flag byte == 0 */ /* * NM extension comprises: * one byte flag * rest is long name */ /* TODO: Obey flags. */ char *old_name = file->name; data++; /* Skip flag byte. */ data_length--; file->name = malloc(data_length + 1); if (file->name != NULL) { free(old_name); memcpy(file->name, data, data_length); file->name[data_length] = '\0'; } else file->name = old_name; break; } /* FALLTHROUGH */ case 'P': if (p[0] == 'P' && p[1] == 'D' && version == 1) { /* * PD extension is padding; * contents are always ignored. */ break; } if (p[0] == 'P' && p[1] == 'X' && version == 1) { /* * PX extension comprises: * 8 bytes for mode, * 8 bytes for nlinks, * 8 bytes for uid, * 8 bytes for gid, * 8 bytes for inode. */ if (data_length == 32) { file->mode = toi(data, 4); file->nlinks = toi(data + 8, 4); file->uid = toi(data + 16, 4); file->gid = toi(data + 24, 4); file->inode = toi(data + 32, 4); } break; } /* FALLTHROUGH */ case 'R': if (p[0] == 'R' && p[1] == 'R' && version == 1) { iso9660->seenRockridge = 1; /* * RR extension comprises: * one byte flag value */ /* TODO: Handle RR extension. */ break; } /* FALLTHROUGH */ case 'S': if (p[0] == 'S' && p[1] == 'L' && version == 1 && *data == 0) { int cont = 1; /* SL extension with flags == 0 */ /* TODO: handle non-zero flag values. */ data++; /* Skip flag byte. */ data_length--; while (data_length > 0) { unsigned char flag = *data++; unsigned char nlen = *data++; data_length -= 2; if (cont == 0) archive_strcat(&file->symlink, "/"); cont = 0; switch(flag) { case 0x01: /* Continue */ archive_strncat(&file->symlink, data, nlen); cont = 1; break; case 0x02: /* Current */ archive_strcat(&file->symlink, "."); break; case 0x04: /* Parent */ archive_strcat(&file->symlink, ".."); break; case 0x08: /* Root */ case 0x10: /* Volume root */ archive_string_empty(&file->symlink); break; case 0x20: /* Hostname */ archive_strcat(&file->symlink, "hostname"); break; case 0: archive_strncat(&file->symlink, data, nlen); break; default: /* TODO: issue a warning ? */ break; } data += nlen; data_length -= nlen; } break; } if (p[0] == 'S' && p[1] == 'P' && version == 1 && data_length == 7 && data[0] == (unsigned char)'\xbe' && data[1] == (unsigned char)'\xef') { /* * SP extension stores the suspOffset * (Number of bytes to skip between * filename and SUSP records.) * It is mandatory by the SUSP standard * (IEEE 1281). * * It allows SUSP to coexist with * non-SUSP uses of the System * Use Area by placing non-SUSP data * before SUSP data. * * TODO: Add a check for 'SP' in * first directory entry, disable all SUSP * processing if not found. */ iso9660->suspOffset = data[2]; break; } if (p[0] == 'S' && p[1] == 'T' && data_length == 0 && version == 1) { /* * ST extension marks end of this * block of SUSP entries. * * It allows SUSP to coexist with * non-SUSP uses of the System * Use Area by placing non-SUSP data * after SUSP data. */ return; } case 'T': if (p[0] == 'T' && p[1] == 'F' && version == 1) { char flag = data[0]; /* * TF extension comprises: * one byte flag * create time (optional) * modify time (optional) * access time (optional) * attribute time (optional) * Time format and presence of fields * is controlled by flag bits. */ data++; if (flag & 0x80) { /* Use 17-byte time format. */ if (flag & 1) /* Create time. */ data += 17; if (flag & 2) { /* Modify time. */ file->mtime = isodate17(data); data += 17; } if (flag & 4) { /* Access time. */ file->atime = isodate17(data); data += 17; } if (flag & 8) { /* Attribute time. */ file->ctime = isodate17(data); data += 17; } } else { /* Use 7-byte time format. */ if (flag & 1) /* Create time. */ data += 7; if (flag & 2) { /* Modify time. */ file->mtime = isodate7(data); data += 7; } if (flag & 4) { /* Access time. */ file->atime = isodate7(data); data += 7; } if (flag & 8) { /* Attribute time. */ file->ctime = isodate7(data); data += 7; } } break; } /* FALLTHROUGH */ default: /* The FALLTHROUGHs above leave us here for * any unsupported extension. */ { const unsigned char *t; fprintf(stderr, "\nUnsupported RRIP extension for %s\n", file->name); fprintf(stderr, " %c%c(%d):", p[0], p[1], data_length); for (t = data; t < data + data_length && t < data + 16; t++) fprintf(stderr, " %02x", *t); fprintf(stderr, "\n"); } } p += p[2]; } } static void release_file(struct iso9660 *iso9660, struct file_info *file) { struct file_info *parent; if (file->refcount == 0) { parent = file->parent; if (file->name) free(file->name); archive_string_free(&file->symlink); free(file); if (parent != NULL) { parent->refcount--; release_file(iso9660, parent); } } } static int next_entry_seek(struct archive *a, struct iso9660 *iso9660, struct file_info **pfile) { struct file_info *file; uint64_t offset; *pfile = NULL; for (;;) { *pfile = file = next_entry(iso9660); if (file == NULL) return (ARCHIVE_EOF); /* CE area precedes actual file data? Ignore it. */ if (file->ce_offset > file->offset) { fprintf(stderr, " *** Discarding CE data.\n"); file->ce_offset = 0; file->ce_size = 0; } /* If CE exists, find and read it now. */ if (file->ce_offset > 0) offset = file->ce_offset; else offset = file->offset; /* Seek forward to the start of the entry. */ while (iso9660->current_position < offset) { ssize_t step = offset - iso9660->current_position; ssize_t bytes_read; const void *buff; if (step > iso9660->logical_block_size) step = iso9660->logical_block_size; bytes_read = (a->compression_read_ahead)(a, &buff, step); if (bytes_read <= 0) { release_file(iso9660, file); return (ARCHIVE_FATAL); } if (bytes_read > step) bytes_read = step; iso9660->current_position += bytes_read; (a->compression_read_consume)(a, bytes_read); } /* We found body of file; handle it now. */ if (offset == file->offset) return (ARCHIVE_OK); /* Found CE? Process it and push the file back onto list. */ if (offset == file->ce_offset) { const void *p; ssize_t size = file->ce_size; ssize_t bytes_read; const unsigned char *rr_start; file->ce_offset = 0; file->ce_size = 0; bytes_read = (a->compression_read_ahead)(a, &p, size); if (bytes_read > size) bytes_read = size; rr_start = (const unsigned char *)p; parse_rockridge(iso9660, file, rr_start, rr_start + bytes_read); (a->compression_read_consume)(a, bytes_read); iso9660->current_position += bytes_read; add_entry(iso9660, file); } } } static struct file_info * next_entry(struct iso9660 *iso9660) { int least_index; uint64_t least_end_offset; int i; struct file_info *r; if (iso9660->pending_files_used < 1) return (NULL); /* Assume the first file in the list is the earliest on disk. */ least_index = 0; least_end_offset = iso9660->pending_files[0]->offset + iso9660->pending_files[0]->size; /* Now, try to find an earlier one. */ for(i = 0; i < iso9660->pending_files_used; i++) { /* Use the position of the file *end* as our comparison. */ uint64_t end_offset = iso9660->pending_files[i]->offset + iso9660->pending_files[i]->size; if (iso9660->pending_files[i]->ce_offset > 0 && iso9660->pending_files[i]->ce_offset < iso9660->pending_files[i]->offset) end_offset = iso9660->pending_files[i]->ce_offset + iso9660->pending_files[i]->ce_size; if (least_end_offset > end_offset) { least_index = i; least_end_offset = end_offset; } } r = iso9660->pending_files[least_index]; iso9660->pending_files[least_index] = iso9660->pending_files[--iso9660->pending_files_used]; return (r); } static int toi(const void *p, int n) { const unsigned char *v = (const unsigned char *)p; if (n > 1) return v[0] + 256 * toi(v + 1, n - 1); if (n == 1) return v[0]; return (0); } static time_t isodate7(const void *p) { struct tm tm; const unsigned char *v = (const unsigned char *)p; int offset; tm.tm_year = v[0]; tm.tm_mon = v[1] - 1; tm.tm_mday = v[2]; tm.tm_hour = v[3]; tm.tm_min = v[4]; tm.tm_sec = v[5]; /* v[6] is the timezone offset, in 1/4-hour increments. */ offset = ((const signed char *)p)[6]; if (offset > -48 && offset < 52) { tm.tm_hour -= offset / 4; tm.tm_min -= (offset % 4) * 15; } return (timegm(&tm)); } static time_t isodate17(const void *p) { struct tm tm; const unsigned char *v = (const unsigned char *)p; int offset; tm.tm_year = (v[0] - '0') * 1000 + (v[1] - '0') * 100 + (v[2] - '0') * 10 + (v[3] - '0') - 1900; tm.tm_mon = (v[4] - '0') * 10 + (v[5] - '0'); tm.tm_mday = (v[6] - '0') * 10 + (v[7] - '0'); tm.tm_hour = (v[8] - '0') * 10 + (v[9] - '0'); tm.tm_min = (v[10] - '0') * 10 + (v[11] - '0'); tm.tm_sec = (v[12] - '0') * 10 + (v[13] - '0'); /* v[16] is the timezone offset, in 1/4-hour increments. */ offset = ((const signed char *)p)[16]; if (offset > -48 && offset < 52) { tm.tm_hour -= offset / 4; tm.tm_min -= (offset % 4) * 15; } return (timegm(&tm)); } static const char * build_pathname(struct archive_string *as, struct file_info *file) { if (file->parent != NULL && file->parent->name[0] != '\0') { build_pathname(as, file->parent); archive_strcat(as, "/"); } if (file->name[0] == '\0') archive_strcat(as, "."); else archive_strcat(as, file->name); return (as->s); } static void dump_isodirrec(FILE *out, const struct iso9660_directory_record *isodirrec) { fprintf(out, " l %d,", isodirrec->length[0]); fprintf(out, " a %d,", isodirrec->ext_attr_length[0]); fprintf(out, " ext 0x%x,", toi(isodirrec->extent, 4)); fprintf(out, " s %d,", toi(isodirrec->size, 4)); fprintf(out, " f 0x%02x,", isodirrec->flags[0]); fprintf(out, " u %d,", isodirrec->file_unit_size[0]); fprintf(out, " ilv %d,", isodirrec->interleave[0]); fprintf(out, " seq %d,", toi(isodirrec->volume_sequence_number,2)); fprintf(out, " nl %d:", isodirrec->name_len[0]); fprintf(out, " `%.*s'", isodirrec->name_len[0], isodirrec->name); } Index: head/lib/libarchive/archive_read_support_format_tar.c =================================================================== --- head/lib/libarchive/archive_read_support_format_tar.c (revision 144702) +++ head/lib/libarchive/archive_read_support_format_tar.c (revision 144703) @@ -1,1645 +1,1646 @@ /*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #include /* #include */ /* See archive_platform.h */ #include #include #include #include #include "archive.h" #include "archive_entry.h" #include "archive_private.h" /* * Layout of POSIX 'ustar' tar header. */ struct archive_entry_header_ustar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; /* "old format" header ends here */ char magic[6]; /* For POSIX: "ustar\0" */ char version[2]; /* For POSIX: "00" */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char prefix[155]; }; /* * Structure of GNU tar header */ struct gnu_sparse { char offset[12]; char numbytes[12]; }; struct archive_entry_header_gnutar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char atime[12]; char ctime[12]; char offset[12]; char longnames[4]; char unused[1]; struct gnu_sparse sparse[4]; char isextended[1]; char realsize[12]; /* * GNU doesn't use POSIX 'prefix' field; they use the 'L' (longname) * entry instead. */ }; /* * Data specific to this format. */ struct sparse_block { struct sparse_block *next; off_t offset; off_t remaining; }; struct tar { struct archive_string acl_text; struct archive_string entry_name; struct archive_string entry_linkname; struct archive_string entry_uname; struct archive_string entry_gname; struct archive_string longlink; struct archive_string longname; struct archive_string pax_header; struct archive_string pax_global; wchar_t *pax_entry; size_t pax_entry_length; int header_recursion_depth; off_t entry_bytes_remaining; off_t entry_offset; off_t entry_padding; struct sparse_block *sparse_list; }; static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n); static int archive_block_is_null(const unsigned char *p); int gnu_read_sparse_data(struct archive *, struct tar *, const struct archive_entry_header_gnutar *header); void gnu_parse_sparse_data(struct archive *, struct tar *, const struct gnu_sparse *sparse, int length); static int header_Solaris_ACL(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); static int header_common(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); static int header_old_tar(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); static int header_pax_extensions(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); static int header_pax_global(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int header_longlink(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int header_longname(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int header_volume(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int header_ustar(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int header_gnutar(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); static int archive_read_format_tar_bid(struct archive *); static int archive_read_format_tar_cleanup(struct archive *); static int archive_read_format_tar_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset); static int archive_read_format_tar_read_header(struct archive *, struct archive_entry *); static int checksum(struct archive *, const void *); static int pax_attribute(struct archive_entry *, struct stat *, wchar_t *key, wchar_t *value); static int pax_header(struct archive *, struct tar *, struct archive_entry *, struct stat *, char *attr); static void pax_time(const wchar_t *, int64_t *sec, long *nanos); static int read_body_to_string(struct archive *, struct tar *, struct archive_string *, const void *h); static int64_t tar_atol(const char *, unsigned); static int64_t tar_atol10(const wchar_t *, unsigned); static int64_t tar_atol256(const char *, unsigned); static int64_t tar_atol8(const char *, unsigned); static int tar_read_header(struct archive *, struct tar *, struct archive_entry *, struct stat *); static int utf8_decode(wchar_t *, const char *, size_t length); /* * ANSI C99 defines constants for these, but not everyone supports * those constants, so I define a couple of static variables here and * compute the values. These calculations should be portable to any * 2s-complement architecture. */ #ifdef UINT64_MAX static const uint64_t max_uint64 = UINT64_MAX; #else static const uint64_t max_uint64 = ~(uint64_t)0; #endif #ifdef INT64_MAX static const int64_t max_int64 = INT64_MAX; #else static const int64_t max_int64 = (int64_t)((~(uint64_t)0) >> 1); #endif #ifdef INT64_MIN static const int64_t min_int64 = INT64_MIN; #else static const int64_t min_int64 = (int64_t)(~((~(uint64_t)0) >> 1)); #endif int archive_read_support_format_gnutar(struct archive *a) { return (archive_read_support_format_tar(a)); } int archive_read_support_format_tar(struct archive *a) { struct tar *tar; int r; tar = malloc(sizeof(*tar)); memset(tar, 0, sizeof(*tar)); r = __archive_read_register_format(a, tar, archive_read_format_tar_bid, archive_read_format_tar_read_header, archive_read_format_tar_read_data, + NULL, archive_read_format_tar_cleanup); if (r != ARCHIVE_OK) free(tar); return (ARCHIVE_OK); } static int archive_read_format_tar_cleanup(struct archive *a) { struct tar *tar; tar = *(a->pformat_data); archive_string_free(&tar->acl_text); archive_string_free(&tar->entry_name); archive_string_free(&tar->entry_linkname); archive_string_free(&tar->entry_uname); archive_string_free(&tar->entry_gname); archive_string_free(&tar->pax_global); archive_string_free(&tar->pax_header); if (tar->pax_entry != NULL) free(tar->pax_entry); free(tar); *(a->pformat_data) = NULL; return (ARCHIVE_OK); } static int archive_read_format_tar_bid(struct archive *a) { int bid; ssize_t bytes_read; const void *h; const struct archive_entry_header_ustar *header; /* * If we're already reading a non-tar file, don't * bother to bid. */ if (a->archive_format != 0 && (a->archive_format & ARCHIVE_FORMAT_BASE_MASK) != ARCHIVE_FORMAT_TAR) return (0); bid = 0; /* * If we're already reading a tar format, start the bid at 1 as * a failsafe. */ if ((a->archive_format & ARCHIVE_FORMAT_BASE_MASK) == ARCHIVE_FORMAT_TAR) bid++; /* Now let's look at the actual header and see if it matches. */ if (a->compression_read_ahead != NULL) bytes_read = (a->compression_read_ahead)(a, &h, 512); else bytes_read = 0; /* Empty file. */ if (bytes_read < 0) return (ARCHIVE_FATAL); if (bytes_read == 0 && bid > 0) { /* An archive without a proper end-of-archive marker. */ /* Hold our nose and bid 1 anyway. */ return (1); } if (bytes_read < 512) { /* If it's a new archive, then just return a zero bid. */ if (bid == 0) return (0); /* * If we already know this is a tar archive, * then we have a problem. */ archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive"); return (ARCHIVE_FATAL); } /* If it's an end-of-archive mark, we can handle it. */ if ((*(const char *)h) == 0 && archive_block_is_null(h)) { /* If it's a known tar file, end-of-archive is definite. */ if ((a->archive_format & ARCHIVE_FORMAT_BASE_MASK) == ARCHIVE_FORMAT_TAR) return (512); /* Empty archive? */ return (1); } /* If it's not an end-of-archive mark, it must have a valid checksum.*/ if (!checksum(a, h)) return (0); bid += 48; /* Checksum is usually 6 octal digits. */ header = h; /* Recognize POSIX formats. */ if ((memcmp(header->magic, "ustar\0", 6) == 0) &&(memcmp(header->version, "00", 2)==0)) bid += 56; /* Recognize GNU tar format. */ if ((memcmp(header->magic, "ustar ", 6) == 0) &&(memcmp(header->version, " \0", 2)==0)) bid += 56; /* Type flag must be null, digit or A-Z, a-z. */ if (header->typeflag[0] != 0 && !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) return (0); bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ /* Sanity check: Look at first byte of mode field. */ switch (255 & (unsigned)header->mode[0]) { case 0: case 255: /* Base-256 value: No further verification possible! */ break; case ' ': /* Not recommended, but not illegal, either. */ break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* Octal Value. */ /* TODO: Check format of remainder of this field. */ break; default: /* Not a valid mode; bail out here. */ return (0); } /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ return (bid); } /* * The function invoked by archive_read_header(). This * just sets up a few things and then calls the internal * tar_read_header() function below. */ static int archive_read_format_tar_read_header(struct archive *a, struct archive_entry *entry) { struct stat st; struct tar *tar; const char *p; int r; size_t l; memset(&st, 0, sizeof(st)); tar = *(a->pformat_data); tar->entry_offset = 0; r = tar_read_header(a, tar, entry, &st); if (r == ARCHIVE_OK) { /* * "Regular" entry with trailing '/' is really * directory: This is needed for certain old tar * variants and even for some broken newer ones. */ p = archive_entry_pathname(entry); l = strlen(p); if (S_ISREG(st.st_mode) && p[l-1] == '/') { st.st_mode &= ~S_IFMT; st.st_mode |= S_IFDIR; } /* Copy the final stat data into the entry. */ archive_entry_copy_stat(entry, &st); } return (r); } static int archive_read_format_tar_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset) { ssize_t bytes_read; struct tar *tar; struct sparse_block *p; tar = *(a->pformat_data); if (tar->entry_bytes_remaining > 0) { bytes_read = (a->compression_read_ahead)(a, buff, 1); if (bytes_read <= 0) return (ARCHIVE_FATAL); if (bytes_read > tar->entry_bytes_remaining) bytes_read = tar->entry_bytes_remaining; while (tar->sparse_list != NULL && tar->sparse_list->remaining == 0) { p = tar->sparse_list; tar->sparse_list = p->next; free(p); if (tar->sparse_list != NULL) tar->entry_offset = tar->sparse_list->offset; } if (tar->sparse_list != NULL) { if (tar->sparse_list->remaining < bytes_read) bytes_read = tar->sparse_list->remaining; tar->sparse_list->remaining -= bytes_read; } *size = bytes_read; *offset = tar->entry_offset; tar->entry_offset += bytes_read; tar->entry_bytes_remaining -= bytes_read; (a->compression_read_consume)(a, bytes_read); return (ARCHIVE_OK); } else { while (tar->entry_padding > 0) { bytes_read = (a->compression_read_ahead)(a, buff, 1); if (bytes_read <= 0) return (ARCHIVE_FATAL); if (bytes_read > tar->entry_padding) bytes_read = tar->entry_padding; (a->compression_read_consume)(a, bytes_read); tar->entry_padding -= bytes_read; } *buff = NULL; *size = 0; *offset = tar->entry_offset; return (ARCHIVE_EOF); } } /* * This function recursively interprets all of the headers associated * with a single entry. */ static int tar_read_header(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st) { ssize_t bytes; int err; const void *h; const struct archive_entry_header_ustar *header; /* Read 512-byte header record */ bytes = (a->compression_read_ahead)(a, &h, 512); if (bytes < 512) { /* * If we're here, it's becase the _bid function accepted * this file. So just call a short read end-of-archive * and be done with it. */ return (ARCHIVE_EOF); } (a->compression_read_consume)(a, 512); /* Check for end-of-archive mark. */ if (((*(const char *)h)==0) && archive_block_is_null(h)) { /* Try to consume a second all-null record, as well. */ bytes = (a->compression_read_ahead)(a, &h, 512); if (bytes > 0) (a->compression_read_consume)(a, bytes); archive_set_error(a, 0, NULL); return (ARCHIVE_EOF); } /* * Note: If the checksum fails and we return ARCHIVE_RETRY, * then the client is likely to just retry. This is a very * crude way to search for the next valid header! * * TODO: Improve this by implementing a real header scan. */ if (!checksum(a, h)) { archive_set_error(a, EINVAL, "Damaged tar archive"); return (ARCHIVE_RETRY); /* Retryable: Invalid header */ } if (++tar->header_recursion_depth > 32) { archive_set_error(a, EINVAL, "Too many special headers"); return (ARCHIVE_WARN); } /* Determine the format variant. */ header = h; switch(header->typeflag[0]) { case 'A': /* Solaris tar ACL */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "Solaris tar"; err = header_Solaris_ACL(a, tar, entry, st, h); break; case 'g': /* POSIX-standard 'g' header. */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "POSIX pax interchange format"; err = header_pax_global(a, tar, entry, st, h); break; case 'K': /* Long link name (GNU tar, others) */ err = header_longlink(a, tar, entry, st, h); break; case 'L': /* Long filename (GNU tar, others) */ err = header_longname(a, tar, entry, st, h); break; case 'V': /* GNU volume header */ err = header_volume(a, tar, entry, st, h); break; case 'X': /* Used by SUN tar; same as 'x'. */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "POSIX pax interchange format (Sun variant)"; err = header_pax_extensions(a, tar, entry, st, h); break; case 'x': /* POSIX-standard 'x' header. */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "POSIX pax interchange format"; err = header_pax_extensions(a, tar, entry, st, h); break; default: if (memcmp(header->magic, "ustar \0", 8) == 0) { a->archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; a->archive_format_name = "GNU tar format"; err = header_gnutar(a, tar, entry, st, h); } else if (memcmp(header->magic, "ustar", 5) == 0) { if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { a->archive_format = ARCHIVE_FORMAT_TAR_USTAR; a->archive_format_name = "POSIX ustar format"; } err = header_ustar(a, tar, entry, st, h); } else { a->archive_format = ARCHIVE_FORMAT_TAR; a->archive_format_name = "tar (non-POSIX)"; err = header_old_tar(a, tar, entry, st, h); } } --tar->header_recursion_depth; return (err); } /* * Return true if block checksum is correct. */ static int checksum(struct archive *a, const void *h) { const unsigned char *bytes; const struct archive_entry_header_ustar *header; int check, i, sum; (void)a; /* UNUSED */ bytes = h; header = h; /* * Test the checksum. Note that POSIX specifies _unsigned_ * bytes for this calculation. */ sum = tar_atol(header->checksum, sizeof(header->checksum)); check = 0; for (i = 0; i < 148; i++) check += (unsigned char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (unsigned char)bytes[i]; if (sum == check) return (1); /* * Repeat test with _signed_ bytes, just in case this archive * was created by an old BSD, Solaris, or HP-UX tar with a * broken checksum calculation. */ check = 0; for (i = 0; i < 148; i++) check += (signed char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (signed char)bytes[i]; if (sum == check) return (1); return (0); } /* * Return true if this block contains only nulls. */ static int archive_block_is_null(const unsigned char *p) { unsigned i; for (i = 0; i < ARCHIVE_BYTES_PER_RECORD / sizeof(*p); i++) if (*p++) return (0); return (1); } /* * Interpret 'A' Solaris ACL header */ static int header_Solaris_ACL(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { int err, err2; char *p; wchar_t *wp; err = read_body_to_string(a, tar, &(tar->acl_text), h); err2 = tar_read_header(a, tar, entry, st); err = err_combine(err, err2); /* XXX Ensure p doesn't overrun acl_text */ /* Skip leading octal number. */ /* XXX TODO: Parse the octal number and sanity-check it. */ p = tar->acl_text.s; while (*p != '\0') p++; p++; wp = malloc((strlen(p) + 1) * sizeof(wchar_t)); if (wp != NULL) { utf8_decode(wp, p, strlen(p)); err2 = __archive_entry_acl_parse_w(entry, wp, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); err = err_combine(err, err2); free(wp); } return (err); } /* * Interpret 'K' long linkname header. */ static int header_longlink(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { int err, err2; err = read_body_to_string(a, tar, &(tar->longlink), h); err2 = tar_read_header(a, tar, entry, st); if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) { /* Set symlink if symlink already set, else hardlink. */ archive_entry_set_link(entry, tar->longlink.s); } return (err_combine(err, err2)); } /* * Interpret 'L' long filename header. */ static int header_longname(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { int err, err2; err = read_body_to_string(a, tar, &(tar->longname), h); /* Read and parse "real" header, then override name. */ err2 = tar_read_header(a, tar, entry, st); if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) archive_entry_set_pathname(entry, tar->longname.s); return (err_combine(err, err2)); } /* * Interpret 'V' GNU tar volume header. */ static int header_volume(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { (void)h; /* Just skip this and read the next header. */ return (tar_read_header(a, tar, entry, st)); } /* * Read body of an archive entry into an archive_string object. */ static int read_body_to_string(struct archive *a, struct tar *tar, struct archive_string *as, const void *h) { off_t size, padded_size; ssize_t bytes_read, bytes_to_copy; const struct archive_entry_header_ustar *header; const void *src; char *dest; (void)tar; /* UNUSED */ header = h; size = tar_atol(header->size, sizeof(header->size)); /* Read the body into the string. */ archive_string_ensure(as, size+1); padded_size = (size + 511) & ~ 511; dest = as->s; while (padded_size > 0) { bytes_read = (a->compression_read_ahead)(a, &src, padded_size); if (bytes_read < 0) return (ARCHIVE_FATAL); if (bytes_read > padded_size) bytes_read = padded_size; (a->compression_read_consume)(a, bytes_read); bytes_to_copy = bytes_read; if ((off_t)bytes_to_copy > size) bytes_to_copy = (ssize_t)size; memcpy(dest, src, bytes_to_copy); dest += bytes_to_copy; size -= bytes_to_copy; padded_size -= bytes_read; } *dest = '\0'; return (ARCHIVE_OK); } /* * Parse out common header elements. * * This would be the same as header_old_tar, except that the * filename is handled slightly differently for old and POSIX * entries (POSIX entries support a 'prefix'). This factoring * allows header_old_tar and header_ustar * to handle filenames differently, while still putting most of the * common parsing into one place. */ static int header_common(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; char tartype; (void)a; /* UNUSED */ header = h; if (header->linkname[0]) archive_strncpy(&(tar->entry_linkname), header->linkname, sizeof(header->linkname)); else archive_string_empty(&(tar->entry_linkname)); /* Parse out the numeric fields (all are octal) */ st->st_mode = tar_atol(header->mode, sizeof(header->mode)); st->st_uid = tar_atol(header->uid, sizeof(header->uid)); st->st_gid = tar_atol(header->gid, sizeof(header->gid)); st->st_size = tar_atol(header->size, sizeof(header->size)); st->st_mtime = tar_atol(header->mtime, sizeof(header->mtime)); /* Handle the tar type flag appropriately. */ tartype = header->typeflag[0]; st->st_mode &= ~S_IFMT; switch (tartype) { case '1': /* Hard link */ archive_entry_set_hardlink(entry, tar->entry_linkname.s); /* * The following may seem odd, but: Technically, tar * does not store the file type for a "hard link" * entry, only the fact that it is a hard link. So, I * leave the type zero normally. But, pax interchange * format allows hard links to have data, which * implies that the underlying entry is a regular * file. */ if (st->st_size > 0) st->st_mode |= S_IFREG; /* * A tricky point: Traditionally, tar readers have * ignored the size field when reading hardlink * entries, and some writers put non-zero sizes even * though the body is empty. POSIX.1-2001 broke with * this tradition by permitting hardlink entries to * store valid bodies in pax interchange format, but * not in ustar format. Since there is no hard and * fast way to distinguish pax interchange from * earlier archives (the 'x' and 'g' entries are * optional, after all), we need a heuristic. Here, I * use the bid function to test whether or not there's * a valid header following. Of course, if we know * this is pax interchange format, then we must obey * the size. * * This heuristic will only fail for a pax interchange * archive that is storing hardlink bodies, no pax * extended attribute entries have yet occurred, and * we encounter a hardlink entry for a file that is * itself an uncompressed tar archive. */ if (st->st_size > 0 && a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && archive_read_format_tar_bid(a) > 50) st->st_size = 0; break; case '2': /* Symlink */ st->st_mode |= S_IFLNK; st->st_size = 0; archive_entry_set_symlink(entry, tar->entry_linkname.s); break; case '3': /* Character device */ st->st_mode |= S_IFCHR; st->st_size = 0; break; case '4': /* Block device */ st->st_mode |= S_IFBLK; st->st_size = 0; break; case '5': /* Dir */ st->st_mode |= S_IFDIR; st->st_size = 0; break; case '6': /* FIFO device */ st->st_mode |= S_IFIFO; st->st_size = 0; break; case 'D': /* GNU incremental directory type */ /* * No special handling is actually required here. * It might be nice someday to preprocess the file list and * provide it to the client, though. */ st->st_mode |= S_IFDIR; break; case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ /* * As far as I can tell, this is just like a regular file * entry, except that the contents should be _appended_ to * the indicated file at the indicated offset. This may * require some API work to fully support. */ break; case 'N': /* Old GNU "long filename" entry. */ /* The body of this entry is a script for renaming * previously-extracted entries. Ugh. It will never * be supported by libarchive. */ st->st_mode |= S_IFREG; break; case 'S': /* GNU sparse files */ /* * Sparse files are really just regular files with * sparse information in the extended area. */ /* FALL THROUGH */ default: /* Regular file and non-standard types */ /* * Per POSIX: non-recognized types should always be * treated as regular files. */ st->st_mode |= S_IFREG; break; } return (0); } /* * Parse out header elements for "old-style" tar archives. */ static int header_old_tar(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; /* Copy filename over (to ensure null termination). */ header = h; archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); archive_entry_set_pathname(entry, tar->entry_name.s); /* Grab rest of common fields */ header_common(a, tar, entry, st, h); tar->entry_bytes_remaining = st->st_size; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (0); } /* * Parse a file header for a pax extended archive entry. */ static int header_pax_global(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { int err, err2; err = read_body_to_string(a, tar, &(tar->pax_global), h); err2 = tar_read_header(a, tar, entry, st); return (err_combine(err, err2)); } static int header_pax_extensions(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { int err, err2; read_body_to_string(a, tar, &(tar->pax_header), h); /* Parse the next header. */ err = tar_read_header(a, tar, entry, st); /* * TODO: Parse global/default options into 'entry' struct here * before handling file-specific options. * * This design (parse standard header, then overwrite with pax * extended attribute data) usually works well, but isn't ideal; * it would be better to parse the pax extended attributes first * and then skip any fields in the standard header that were * defined in the pax header. */ err2 = pax_header(a, tar, entry, st, tar->pax_header.s); err = err_combine(err, err2); tar->entry_bytes_remaining = st->st_size; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Parse a file header for a Posix "ustar" archive entry. This also * handles "pax" or "extended ustar" entries. */ static int header_ustar(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; struct archive_string *as; header = h; /* Copy name into an internal buffer to ensure null-termination. */ as = &(tar->entry_name); if (header->prefix[0]) { archive_strncpy(as, header->prefix, sizeof(header->prefix)); if (as->s[archive_strlen(as) - 1] != '/') archive_strappend_char(as, '/'); archive_strncat(as, header->name, sizeof(header->name)); } else archive_strncpy(as, header->name, sizeof(header->name)); archive_entry_set_pathname(entry, as->s); /* Handle rest of common fields. */ header_common(a, tar, entry, st, h); /* Handle POSIX ustar fields. */ archive_strncpy(&(tar->entry_uname), header->uname, sizeof(header->uname)); archive_entry_set_uname(entry, tar->entry_uname.s); archive_strncpy(&(tar->entry_gname), header->gname, sizeof(header->gname)); archive_entry_set_gname(entry, tar->entry_gname.s); /* Parse out device numbers only for char and block specials. */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { st->st_rdev = makedev( tar_atol(header->rdevmajor, sizeof(header->rdevmajor)), tar_atol(header->rdevminor, sizeof(header->rdevminor))); } tar->entry_bytes_remaining = st->st_size; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (0); } /* * Parse the pax extended attributes record. * * Returns non-zero if there's an error in the data. */ static int pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, char *attr) { size_t attr_length, l, line_length; char *line, *p; wchar_t *key, *wp, *value; int err, err2; attr_length = strlen(attr); err = ARCHIVE_OK; while (attr_length > 0) { /* Parse decimal length field at start of line. */ line_length = 0; l = attr_length; line = p = attr; /* Record start of line. */ while (l>0) { if (*p == ' ') { p++; l--; break; } if (*p < '0' || *p > '9') return (-1); line_length *= 10; line_length += *p - '0'; if (line_length > 999999) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Rejecting pax extended attribute > 1MB"); return (ARCHIVE_WARN); } p++; l--; } if (line_length > attr_length) return (0); /* Ensure pax_entry buffer is big enough. */ if (tar->pax_entry_length <= line_length) { if (tar->pax_entry_length <= 0) tar->pax_entry_length = 1024; while (tar->pax_entry_length <= line_length + 1) tar->pax_entry_length *= 2; /* XXX Error handling here */ tar->pax_entry = realloc(tar->pax_entry, tar->pax_entry_length * sizeof(wchar_t)); } /* Decode UTF-8 to wchar_t, null-terminate result. */ if (utf8_decode(tar->pax_entry, p, line_length - (p - attr) - 1)) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Invalid UTF8 character in pax extended attribute"); err = err_combine(err, ARCHIVE_WARN); } /* Null-terminate 'key' value. */ key = tar->pax_entry; if (key[0] == L'=') return (-1); wp = wcschr(key, L'='); if (wp == NULL) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Invalid pax extended attributes"); return (ARCHIVE_WARN); } *wp = 0; /* Identify null-terminated 'value' portion. */ value = wp + 1; /* Identify this attribute and set it in the entry. */ err2 = pax_attribute(entry, st, key, value); err = err_combine(err, err2); /* Skip to next line */ attr += line_length; attr_length -= line_length; } return (err); } /* * Parse a single key=value attribute. key/value pointers are * assumed to point into reasonably long-lived storage. * * Note that POSIX reserves all-lowercase keywords. Vendor-specific * extensions should always have keywords of the form "VENDOR.attribute" * In particular, it's quite feasible to support many different * vendor extensions here. I'm using "LIBARCHIVE" for extensions * unique to this library (currently, there are none). * * Investigate other vendor-specific extensions, as well and see if * any of them look useful. */ static int pax_attribute(struct archive_entry *entry, struct stat *st, wchar_t *key, wchar_t *value) { int64_t s; long n; switch (key[0]) { case 'L': /* Our extensions */ /* TODO: Handle arbitrary extended attributes... */ /* if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0) archive_entry_set_xxxxxx(entry, value); */ break; case 'S': /* We support some keys used by the "star" archiver */ if (wcscmp(key, L"SCHILY.acl.access")==0) __archive_entry_acl_parse_w(entry, value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); else if (wcscmp(key, L"SCHILY.acl.default")==0) __archive_entry_acl_parse_w(entry, value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); else if (wcscmp(key, L"SCHILY.devmajor")==0) st->st_rdev = makedev(tar_atol10(value, wcslen(value)), minor(st->st_rdev)); else if (wcscmp(key, L"SCHILY.devminor")==0) st->st_rdev = makedev(major(st->st_rdev), tar_atol10(value, wcslen(value))); else if (wcscmp(key, L"SCHILY.fflags")==0) archive_entry_copy_fflags_text_w(entry, value); else if (wcscmp(key, L"SCHILY.nlink")==0) st->st_nlink = tar_atol10(value, wcslen(value)); break; case 'a': if (wcscmp(key, L"atime")==0) { pax_time(value, &s, &n); st->st_atime = s; ARCHIVE_STAT_SET_ATIME_NANOS(st, n); } break; case 'c': if (wcscmp(key, L"ctime")==0) { pax_time(value, &s, &n); st->st_ctime = s; ARCHIVE_STAT_SET_CTIME_NANOS(st, n); } else if (wcscmp(key, L"charset")==0) { /* TODO: Publish charset information in entry. */ } else if (wcscmp(key, L"comment")==0) { /* TODO: Publish comment in entry. */ } break; case 'g': if (wcscmp(key, L"gid")==0) st->st_gid = tar_atol10(value, wcslen(value)); else if (wcscmp(key, L"gname")==0) archive_entry_copy_gname_w(entry, value); break; case 'l': /* pax interchange doesn't distinguish hardlink vs. symlink. */ if (wcscmp(key, L"linkpath")==0) { if (archive_entry_hardlink(entry)) archive_entry_copy_hardlink_w(entry, value); else archive_entry_copy_symlink_w(entry, value); } break; case 'm': if (wcscmp(key, L"mtime")==0) { pax_time(value, &s, &n); st->st_mtime = s; ARCHIVE_STAT_SET_MTIME_NANOS(st, n); } break; case 'p': if (wcscmp(key, L"path")==0) archive_entry_copy_pathname_w(entry, value); break; case 'r': /* POSIX has reserved 'realtime.*' */ break; case 's': /* POSIX has reserved 'security.*' */ /* Someday: if (wcscmp(key, L"security.acl")==0) { ... } */ if (wcscmp(key, L"size")==0) st->st_size = tar_atol10(value, wcslen(value)); break; case 'u': if (wcscmp(key, L"uid")==0) st->st_uid = tar_atol10(value, wcslen(value)); else if (wcscmp(key, L"uname")==0) archive_entry_copy_uname_w(entry, value); break; } return (0); } /* * parse a decimal time value, which may include a fractional portion */ static void pax_time(const wchar_t *p, int64_t *ps, long *pn) { char digit; int64_t s; unsigned long l; int sign; int64_t limit, last_digit_limit; limit = max_int64 / 10; last_digit_limit = max_int64 % 10; s = 0; sign = 1; if (*p == '-') { sign = -1; p++; } while (*p >= '0' && *p <= '9') { digit = *p - '0'; if (s > limit || (s == limit && digit > last_digit_limit)) { s = max_uint64; break; } s = (s * 10) + digit; ++p; } *ps = s * sign; /* Calculate nanoseconds. */ *pn = 0; if (*p != '.') return; l = 100000000UL; do { ++p; if (*p >= '0' && *p <= '9') *pn += (*p - '0') * l; else break; } while (l /= 10); } /* * Parse GNU tar header */ static int header_gnutar(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_gnutar *header; (void)a; /* * GNU header is like POSIX ustar, except 'prefix' is * replaced with some other fields. This also means the * filename is stored as in old-style archives. */ /* Grab fields common to all tar variants. */ header_common(a, tar, entry, st, h); /* Copy filename over (to ensure null termination). */ header = h; archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); archive_entry_set_pathname(entry, tar->entry_name.s); /* Fields common to ustar and GNU */ /* XXX Can the following be factored out since it's common * to ustar and gnu tar? Is it okay to move it down into * header_common, perhaps? */ archive_strncpy(&(tar->entry_uname), header->uname, sizeof(header->uname)); archive_entry_set_uname(entry, tar->entry_uname.s); archive_strncpy(&(tar->entry_gname), header->gname, sizeof(header->gname)); archive_entry_set_gname(entry, tar->entry_gname.s); /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') st->st_rdev = makedev ( tar_atol(header->rdevmajor, sizeof(header->rdevmajor)), tar_atol(header->rdevminor, sizeof(header->rdevminor))); else st->st_rdev = 0; tar->entry_bytes_remaining = st->st_size; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); /* Grab GNU-specific fields. */ st->st_atime = tar_atol(header->atime, sizeof(header->atime)); st->st_ctime = tar_atol(header->ctime, sizeof(header->ctime)); if (header->realsize[0] != 0) { st->st_size = tar_atol(header->realsize, sizeof(header->realsize)); } if (header->sparse[0].offset[0] != 0) { gnu_read_sparse_data(a, tar, header); } else { if (header->isextended[0] != 0) { /* XXX WTF? XXX */ } } return (0); } int gnu_read_sparse_data(struct archive *a, struct tar *tar, const struct archive_entry_header_gnutar *header) { ssize_t bytes_read; const void *data; struct extended { struct gnu_sparse sparse[21]; char isextended[1]; char padding[7]; }; const struct extended *ext; gnu_parse_sparse_data(a, tar, header->sparse, 4); if (header->isextended[0] == 0) return (ARCHIVE_OK); do { bytes_read = (a->compression_read_ahead)(a, &data, 512); if (bytes_read < 0) return (ARCHIVE_FATAL); if (bytes_read < 512) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive " "detected while reading sparse file data"); return (ARCHIVE_FATAL); } (a->compression_read_consume)(a, 512); ext = (const struct extended *)data; gnu_parse_sparse_data(a, tar, ext->sparse, 21); } while (ext->isextended[0] != 0); if (tar->sparse_list != NULL) tar->entry_offset = tar->sparse_list->offset; return (ARCHIVE_OK); } void gnu_parse_sparse_data(struct archive *a, struct tar *tar, const struct gnu_sparse *sparse, int length) { struct sparse_block *last; struct sparse_block *p; (void)a; /* UNUSED */ last = tar->sparse_list; while (last != NULL && last->next != NULL) last = last->next; while (length > 0 && sparse->offset[0] != 0) { p = malloc(sizeof(*p)); memset(p, 0, sizeof(*p)); if (last != NULL) last->next = p; else tar->sparse_list = p; last = p; p->offset = tar_atol(sparse->offset, sizeof(sparse->offset)); p->remaining = tar_atol(sparse->numbytes, sizeof(sparse->numbytes)); sparse++; length--; } } /*- * Convert text->integer. * * Traditional tar formats (including POSIX) specify base-8 for * all of the standard numeric fields. This is a significant limitation * in practice: * = file size is limited to 8GB * = rdevmajor and rdevminor are limited to 21 bits * = uid/gid are limited to 21 bits * * There are two workarounds for this: * = pax extended headers, which use variable-length string fields * = GNU tar and STAR both allow either base-8 or base-256 in * most fields. The high bit is set to indicate base-256. * * On read, this implementation supports both extensions. */ static int64_t tar_atol(const char *p, unsigned char_cnt) { /* * Technically, GNU tar considers a field to be in base-256 * only if the first byte is 0xff or 0x80. */ if (*p & 0x80) return (tar_atol256(p, char_cnt)); return (tar_atol8(p, char_cnt)); } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t tar_atol8(const char *p, unsigned char_cnt) { int64_t l, limit, last_digit_limit; int digit, sign, base; base = 8; limit = max_int64 / base; last_digit_limit = max_int64 % base; while (*p == ' ' || *p == '\t') p++; if (*p == '-') { sign = -1; p++; } else sign = 1; l = 0; digit = *p - '0'; while (digit >= 0 && digit < base && char_cnt-- > 0) { if (l>limit || (l == limit && digit > last_digit_limit)) { l = max_uint64; /* Truncate on overflow. */ break; } l = (l * base) + digit; digit = *++p - '0'; } return (sign < 0) ? -l : l; } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t tar_atol10(const wchar_t *p, unsigned char_cnt) { int64_t l, limit, last_digit_limit; int base, digit, sign; base = 10; limit = max_int64 / base; last_digit_limit = max_int64 % base; while (*p == ' ' || *p == '\t') p++; if (*p == '-') { sign = -1; p++; } else sign = 1; l = 0; digit = *p - '0'; while (digit >= 0 && digit < base && char_cnt-- > 0) { if (l > limit || (l == limit && digit > last_digit_limit)) { l = max_uint64; /* Truncate on overflow. */ break; } l = (l * base) + digit; digit = *++p - '0'; } return (sign < 0) ? -l : l; } /* * Parse a base-256 integer. This is just a straight signed binary * value in big-endian order, except that the high-order bit is * ignored. Remember that "int64_t" may or may not be exactly 64 * bits; the implementation here tries to avoid making any assumptions * about the actual size of an int64_t. It does assume we're using * twos-complement arithmetic, though. */ static int64_t tar_atol256(const char *_p, unsigned char_cnt) { int64_t l, upper_limit, lower_limit; const unsigned char *p = _p; upper_limit = max_int64 / 256; lower_limit = min_int64 / 256; /* Pad with 1 or 0 bits, depending on sign. */ if ((0x40 & *p) == 0x40) l = (int64_t)-1; else l = 0; l = (l << 6) | (0x3f & *p++); while (--char_cnt > 0) { if (l > upper_limit) { l = max_int64; /* Truncate on overflow */ break; } else if (l < lower_limit) { l = min_int64; break; } l = (l << 8) | (0xff & (int64_t)*p++); } return (l); } static int utf8_decode(wchar_t *dest, const char *src, size_t length) { size_t n; int err; err = 0; while(length > 0) { n = UTF8_mbrtowc(dest, src, length); if (n == 0) break; if (n > 8) { /* Invalid byte encountered; try to keep going. */ *dest = L'?'; n = 1; err = 1; } dest++; src += n; length -= n; } *dest++ = L'\0'; return (err); } /* * Copied from FreeBSD libc/locale. */ static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n) { int ch, i, len, mask; unsigned long lbound, wch; if (s == NULL) /* Reset to initial shift state (no-op) */ return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); /* * Determine the number of octets that make up this character from * the first octet, and a mask that extracts the interesting bits of * the first octet. * * We also specify a lower bound for the character code to detect * redundant, non-"shortest form" encodings. For example, the * sequence C0 80 is _not_ a legal representation of the null * character. This enforces a 1-to-1 mapping between character * codes and their multibyte representations. */ ch = (unsigned char)*s; if ((ch & 0x80) == 0) { mask = 0x7f; len = 1; lbound = 0; } else if ((ch & 0xe0) == 0xc0) { mask = 0x1f; len = 2; lbound = 0x80; } else if ((ch & 0xf0) == 0xe0) { mask = 0x0f; len = 3; lbound = 0x800; } else if ((ch & 0xf8) == 0xf0) { mask = 0x07; len = 4; lbound = 0x10000; } else if ((ch & 0xfc) == 0xf8) { mask = 0x03; len = 5; lbound = 0x200000; } else if ((ch & 0xfc) == 0xfc) { mask = 0x01; len = 6; lbound = 0x4000000; } else { /* * Malformed input; input is not UTF-8. */ errno = EILSEQ; return ((size_t)-1); } if (n < (size_t)len) /* Incomplete multibyte sequence */ return ((size_t)-2); /* * Decode the octet sequence representing the character in chunks * of 6 bits, most significant first. */ wch = (unsigned char)*s++ & mask; i = len; while (--i != 0) { if ((*s & 0xc0) != 0x80) { /* * Malformed input; bad characters in the middle * of a character. */ errno = EILSEQ; return ((size_t)-1); } wch <<= 6; wch |= *s++ & 0x3f; } if (wch < lbound) { /* * Malformed input; redundant encoding. */ errno = EILSEQ; return ((size_t)-1); } if (pwc != NULL) { /* Assign the value to the output; out-of-range values * just get truncated. */ *pwc = (wchar_t)wch; #ifdef WCHAR_MAX /* * If platform has WCHAR_MAX, we can do something * more sensible with out-of-range values. */ if (wch >= WCHAR_MAX) *pwc = '?'; #endif } return (wch == L'\0' ? 0 : len); } Index: head/lib/libarchive/archive_read_support_format_zip.c =================================================================== --- head/lib/libarchive/archive_read_support_format_zip.c (revision 144702) +++ head/lib/libarchive/archive_read_support_format_zip.c (revision 144703) @@ -1,535 +1,793 @@ /*- * Copyright (c) 2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef HAVE_ZLIB_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_private.h" struct zip { + /* entry_bytes_remaining is the number of bytes we expect. */ off_t entry_bytes_remaining; off_t entry_offset; + /* These count the number of bytes actually read for the entry. */ + off_t entry_compressed_bytes_read; + off_t entry_uncompressed_bytes_read; + unsigned version; unsigned system; unsigned flags; unsigned compression; const char * compression_name; time_t mtime; + time_t ctime; + time_t atime; + mode_t mode; + uid_t uid; + gid_t gid; + + /* Flags to mark progress of decompression. */ + char decompress_init; char end_of_entry; + char end_of_entry_cleanup; long crc32; ssize_t filename_length; ssize_t extra_length; off_t uncompressed_size; off_t compressed_size; unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; #ifdef HAVE_ZLIB_H z_stream stream; #endif struct archive_string pathname; struct archive_string extra; char format_name[64]; }; #define ZIP_LENGTH_AT_END 8 struct zip_file_header { char signature[4]; - char version[1]; - char reserved[1]; + char version[2]; char flags[2]; char compression[2]; char timedate[4]; char crc32[4]; char compressed_size[4]; char uncompressed_size[4]; char filename_length[2]; char extra_length[2]; }; const char *compression_names[] = { "uncompressed", "shrinking", "reduced-1", "reduced-2", "reduced-3", "reduced-4", "imploded", "reserved", "deflation" }; static int archive_read_format_zip_bid(struct archive *); static int archive_read_format_zip_cleanup(struct archive *); static int archive_read_format_zip_read_data(struct archive *, const void **, size_t *, off_t *); +static int archive_read_format_zip_read_data_skip(struct archive *a); static int archive_read_format_zip_read_header(struct archive *, struct archive_entry *); static int i2(const char *); static int i4(const char *); +static unsigned int u2(const char *); +static unsigned int u4(const char *); +static uint64_t u8(const char *); static int zip_read_data_deflate(struct archive *a, const void **buff, size_t *size, off_t *offset); static int zip_read_data_none(struct archive *a, const void **buff, size_t *size, off_t *offset); -static int zip_read_data_skip(struct archive *a, const void **buff, - size_t *size, off_t *offset); +static int zip_read_file_header(struct archive *a, + struct archive_entry *entry, struct zip *zip); static time_t zip_time(const char *); +static void process_extra(const void* extra, struct zip* zip); int archive_read_support_format_zip(struct archive *a) { struct zip *zip; int r; zip = malloc(sizeof(*zip)); memset(zip, 0, sizeof(*zip)); r = __archive_read_register_format(a, zip, archive_read_format_zip_bid, archive_read_format_zip_read_header, archive_read_format_zip_read_data, + archive_read_format_zip_read_data_skip, archive_read_format_zip_cleanup); if (r != ARCHIVE_OK) free(zip); return (ARCHIVE_OK); } static int archive_read_format_zip_bid(struct archive *a) { int bytes_read; int bid = 0; const void *h; const char *p; if (a->archive_format == ARCHIVE_FORMAT_ZIP) bid += 1; bytes_read = (a->compression_read_ahead)(a, &h, 4); if (bytes_read < 4) return (-1); p = h; if (p[0] == 'P' && p[1] == 'K') { bid += 16; if (p[2] == '\001' && p[3] == '\002') bid += 16; else if (p[2] == '\003' && p[3] == '\004') bid += 16; else if (p[2] == '\005' && p[3] == '\006') bid += 16; else if (p[2] == '\007' && p[3] == '\010') bid += 16; } return (bid); } static int archive_read_format_zip_read_header(struct archive *a, struct archive_entry *entry) { int bytes_read; const void *h; - const struct zip_file_header *p; + const char *signature; struct zip *zip; a->archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive_format_name == NULL) a->archive_format_name = "ZIP"; zip = *(a->pformat_data); + zip->decompress_init = 0; zip->end_of_entry = 0; - bytes_read = - (a->compression_read_ahead)(a, &h, sizeof(struct zip_file_header)); + zip->end_of_entry_cleanup = 0; + zip->entry_uncompressed_bytes_read = 0; + zip->entry_compressed_bytes_read = 0; + bytes_read = (a->compression_read_ahead)(a, &h, 4); if (bytes_read < 4) return (ARCHIVE_FATAL); - p = h; - if (p->signature[0] != 'P' || p->signature[1] != 'K') { + signature = h; + if (signature[0] != 'P' || signature[1] != 'K') { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Bad ZIP file"); return (ARCHIVE_FATAL); } - if (p->signature[2] == '\001' && p->signature[3] == '\002') { + if (signature[2] == '\001' && signature[3] == '\002') { /* Beginning of central directory. */ return (ARCHIVE_EOF); - } else if (p->signature[2] == '\003' && p->signature[3] == '\004') { - /* Regular file entry; fall through. */ - } else if (p->signature[2] == '\005' && p->signature[3] == '\006') { + } + + if (signature[2] == '\003' && signature[3] == '\004') { + /* Regular file entry. */ + return (zip_read_file_header(a, entry, zip)); + } + + if (signature[2] == '\005' && signature[3] == '\006') { /* End-of-archive record. */ return (ARCHIVE_EOF); - } else if (p->signature[2] == '\007' && p->signature[3] == '\010') { - /* ??? Need to research this. ??? */ - } else { - archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, - "Damaged ZIP file or unsupported format variant (%d,%d)", p->signature[2], p->signature[3]); + } + + if (signature[2] == '\007' && signature[3] == '\010') { + /* + * We should never encounter this record here; + * see ZIP_LENGTH_AT_END handling below for details. + */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Bad ZIP file: Unexpected end-of-entry record"); return (ARCHIVE_FATAL); } + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Damaged ZIP file or unsupported format variant (%d,%d)", + signature[2], signature[3]); + return (ARCHIVE_FATAL); +} + +int +zip_read_file_header(struct archive *a, struct archive_entry *entry, + struct zip *zip) +{ + const struct zip_file_header *p; + const void *h; + int bytes_read; + struct stat st; + + bytes_read = + (a->compression_read_ahead)(a, &h, sizeof(struct zip_file_header)); if (bytes_read < (int)sizeof(struct zip_file_header)) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } + p = h; zip->version = p->version[0]; zip->system = p->version[1]; zip->flags = i2(p->flags); zip->compression = i2(p->compression); if (zip->compression < sizeof(compression_names)/sizeof(compression_names[0])) zip->compression_name = compression_names[zip->compression]; else zip->compression_name = "??"; zip->mtime = zip_time(p->timedate); + zip->ctime = 0; + zip->atime = 0; + zip->mode = 0; + zip->uid = 0; + zip->gid = 0; zip->crc32 = i4(p->crc32); zip->filename_length = i2(p->filename_length); zip->extra_length = i2(p->extra_length); - zip->uncompressed_size = i4(p->uncompressed_size); - zip->compressed_size = i4(p->compressed_size); + zip->uncompressed_size = u4(p->uncompressed_size); + zip->compressed_size = u4(p->compressed_size); (a->compression_read_consume)(a, sizeof(struct zip_file_header)); /* Read the filename. */ bytes_read = (a->compression_read_ahead)(a, &h, zip->filename_length); if (bytes_read < zip->filename_length) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } archive_string_ensure(&zip->pathname, zip->filename_length); archive_strncpy(&zip->pathname, h, zip->filename_length); (a->compression_read_consume)(a, zip->filename_length); archive_entry_set_pathname(entry, zip->pathname.s); + if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') + zip->mode = S_IFDIR | 0777; + else + zip->mode = S_IFREG | 0777; + /* Read the extra data. */ bytes_read = (a->compression_read_ahead)(a, &h, zip->extra_length); if (bytes_read < zip->extra_length) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } - /* TODO: Store the extra data somewhere? */ + process_extra(h, zip); (a->compression_read_consume)(a, zip->extra_length); /* Populate some additional entry fields: */ - archive_entry_set_mtime(entry, zip->mtime, 0); - if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') - archive_entry_set_mode(entry, S_IFDIR | 0777); - else - archive_entry_set_mode(entry, S_IFREG | 0777); - archive_entry_set_size(entry, zip->uncompressed_size); + memset(&st, 0, sizeof(st)); + st.st_mode = zip->mode; + st.st_uid = zip->uid; + st.st_gid = zip->gid; + st.st_mtime = zip->mtime; + st.st_ctime = zip->ctime; + st.st_atime = zip->atime; + st.st_size = zip->uncompressed_size; + archive_entry_copy_stat(entry, &st); + zip->entry_bytes_remaining = zip->compressed_size; zip->entry_offset = 0; /* Set up a more descriptive format name. */ sprintf(zip->format_name, "ZIP %d.%d (%s)", zip->version / 10, zip->version % 10, zip->compression_name); a->archive_format_name = zip->format_name; return (ARCHIVE_OK); } /* Convert an MSDOS-style date/time into Unix-style time. */ static time_t zip_time(const char *p) { int msTime, msDate; struct tm ts; msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); memset(&ts, 0, sizeof(ts)); ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ ts.tm_mday = msDate & 0x1f; /* Day of month. */ ts.tm_hour = (msTime >> 11) & 0x1f; ts.tm_min = (msTime >> 5) & 0x3f; ts.tm_sec = (msTime << 1) & 0x3e; ts.tm_isdst = -1; return mktime(&ts); } static int archive_read_format_zip_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset) { int r; struct zip *zip; zip = *(a->pformat_data); - if (!zip->end_of_entry) { - switch(zip->compression) { - case 0: /* No compression. */ - r = zip_read_data_none(a, buff, size, offset); - break; - case 8: /* Deflate compression. */ - r = zip_read_data_deflate(a, buff, size, offset); - break; - default: /* Unsupported compression. */ - r = zip_read_data_skip(a, buff, size, offset); - /* Return a warning. */ - archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, - "Unsupported ZIP compression method (%s)", - zip->compression_name); - r = ARCHIVE_WARN; - break; + /* + * If we hit end-of-entry last time, clean up and return + * ARCHIVE_EOF this time. + */ + if (zip->end_of_entry) { + if (!zip->end_of_entry_cleanup) { + if (zip->flags & ZIP_LENGTH_AT_END) { + const void *h; + const char *p; + int bytes_read = + (a->compression_read_ahead)(a, &h, 16); + if (bytes_read < 16) { + archive_set_error(a, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP end-of-file record"); + return (ARCHIVE_FATAL); + } + p = h; + zip->crc32 = i4(p + 4); + zip->compressed_size = u4(p + 8); + zip->uncompressed_size = u4(p + 12); + bytes_read = (a->compression_read_consume)(a, 16); + } + + /* Check file size, CRC against these values. */ + if (zip->compressed_size != zip->entry_compressed_bytes_read) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP compressed data is wrong size"); + return (ARCHIVE_WARN); + } + if (zip->uncompressed_size != zip->entry_uncompressed_bytes_read) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP uncompressed data is wrong size"); + return (ARCHIVE_WARN); + } +/* TODO: Compute CRC. */ +/* + if (zip->crc32 != zip->entry_crc32_calculated) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP data CRC error"); + return (ARCHIVE_WARN); + } +*/ + /* End-of-entry cleanup done. */ + zip->end_of_entry_cleanup = 1; } - } else { - r = ARCHIVE_EOF; + return (ARCHIVE_EOF); + } + + switch(zip->compression) { + case 0: /* No compression. */ + r = zip_read_data_none(a, buff, size, offset); + break; + case 8: /* Deflate compression. */ + r = zip_read_data_deflate(a, buff, size, offset); + break; + default: /* Unsupported compression. */ + *buff = NULL; + *size = 0; + *offset = 0; + /* Return a warning. */ + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Unsupported ZIP compression method (%s)", + zip->compression_name); if (zip->flags & ZIP_LENGTH_AT_END) { - /* TODO: Read the "PK\007\008" trailer that follows. */ + /* + * ZIP_LENGTH_AT_END requires us to + * decompress the entry in order to + * skip it, but we don't know this + * compression method, so we give up. + */ + r = ARCHIVE_FATAL; + } else { + /* We know compressed size; just skip it. */ + archive_read_format_zip_read_data_skip(a); + r = ARCHIVE_WARN; } + break; } - if (r == ARCHIVE_EOF) - zip->end_of_entry = 1; return (r); } +/* + * Read "uncompressed" data. According to the current specification, + * if ZIP_LENGTH_AT_END is specified, then the size fields in the + * initial file header are supposed to be set to zero. This would, of + * course, make it impossible for us to read the archive, since we + * couldn't determine the end of the file data. Info-ZIP seems to + * include the real size fields both before and after the data in this + * case (the CRC only appears afterwards), so this works as you would + * expect. + * + * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets + * zip->end_of_entry if it consumes all of the data. + */ static int zip_read_data_none(struct archive *a, const void **buff, size_t *size, off_t *offset) { struct zip *zip; ssize_t bytes_avail; zip = *(a->pformat_data); if (zip->entry_bytes_remaining == 0) { *buff = NULL; *size = 0; *offset = zip->entry_offset; - return (ARCHIVE_EOF); + zip->end_of_entry = 1; + return (ARCHIVE_OK); } /* * Note: '1' here is a performance optimization. * Recall that the decompression layer returns a count of * available bytes; asking for more than that forces the * decompressor to combine reads by copying data. */ bytes_avail = (a->compression_read_ahead)(a, buff, 1); if (bytes_avail <= 0) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } if (bytes_avail > zip->entry_bytes_remaining) bytes_avail = zip->entry_bytes_remaining; (a->compression_read_consume)(a, bytes_avail); *size = bytes_avail; *offset = zip->entry_offset; zip->entry_offset += *size; zip->entry_bytes_remaining -= *size; + zip->entry_uncompressed_bytes_read += *size; + zip->entry_compressed_bytes_read += *size; return (ARCHIVE_OK); } #ifdef HAVE_ZLIB_H static int zip_read_data_deflate(struct archive *a, const void **buff, size_t *size, off_t *offset) { struct zip *zip; ssize_t bytes_avail; const void *compressed_buff; int r; zip = *(a->pformat_data); /* If the buffer hasn't been allocated, allocate it now. */ if (zip->uncompressed_buffer == NULL) { zip->uncompressed_buffer_size = 32 * 1024; zip->uncompressed_buffer = malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(a, ENOMEM, "No memory for ZIP decompression"); return (ARCHIVE_FATAL); } } /* If we haven't yet read any data, initialize the decompressor. */ - if (zip->entry_bytes_remaining == zip->compressed_size) { + if (!zip->decompress_init) { r = inflateInit2(&zip->stream, -15 /* Don't check for zlib header */); if (r != Z_OK) { archive_set_error(a, ARCHIVE_ERRNO_MISC, "Can't initialize ZIP decompression."); return (ARCHIVE_FATAL); } + zip->decompress_init = 1; } /* * Note: '1' here is a performance optimization. * Recall that the decompression layer returns a count of * available bytes; asking for more than that forces the * decompressor to combine reads by copying data. */ bytes_avail = (a->compression_read_ahead)(a, &compressed_buff, 1); if (bytes_avail <= 0) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file body"); return (ARCHIVE_FATAL); } - if (bytes_avail > zip->entry_bytes_remaining) - bytes_avail = zip->entry_bytes_remaining; /* * A bug in zlib.h: stream.next_in should be marked 'const' * but isn't (the library never alters data through the * next_in pointer, only reads it). The result: this ugly * cast to remove 'const'. */ zip->stream.next_in = (void *)(uintptr_t)(const void *)compressed_buff; zip->stream.avail_in = bytes_avail; zip->stream.total_in = 0; zip->stream.next_out = zip->uncompressed_buffer; zip->stream.avail_out = zip->uncompressed_buffer_size; zip->stream.total_out = 0; r = inflate(&zip->stream, 0); switch (r) { case Z_OK: break; case Z_STREAM_END: zip->end_of_entry = 1; break; case Z_MEM_ERROR: archive_set_error(a, ENOMEM, "Out of memory for ZIP decompression"); return (ARCHIVE_FATAL); default: archive_set_error(a, ARCHIVE_ERRNO_MISC, "ZIP decompression failed (%d)", r); return (ARCHIVE_FATAL); } /* Consume as much as the compressor actually used. */ bytes_avail = zip->stream.total_in; (a->compression_read_consume)(a, bytes_avail); zip->entry_bytes_remaining -= bytes_avail; + zip->entry_compressed_bytes_read += bytes_avail; - *offset = zip->entry_offset; *size = zip->stream.total_out; + zip->entry_uncompressed_bytes_read += *size; *buff = zip->uncompressed_buffer; zip->entry_offset += *size; return (ARCHIVE_OK); } #else static int zip_read_data_deflate(struct archive *a, const void **buff, size_t *size, off_t *offset) { int r; - r = zip_read_data_skip(a, buff, size, offset); + *buff = NULL; + *size = 0; + *offset = 0; archive_set_error(a, ARCHIVE_ERRNO_MISC, "libarchive compiled without deflate support (no libz)"); - return (ARCHIVE_WARN); + return (ARCHIVE_FATAL); } #endif static int -zip_read_data_skip(struct archive *a, const void **buff, - size_t *size, off_t *offset) +archive_read_format_zip_read_data_skip(struct archive *a) { struct zip *zip; + const void *buff = NULL; ssize_t bytes_avail; zip = *(a->pformat_data); - /* Return nothing gracefully. */ - *buff = NULL; - *size = 0; - *offset = 0; - zip->end_of_entry = 1; + /* + * If the length is at the end, we have no choice but + * to decompress all the data to find the end marker. + */ + if (zip->flags & ZIP_LENGTH_AT_END) { + ssize_t size; + off_t offset; + int r; + do { + r = archive_read_format_zip_read_data(a, &buff, + &size, &offset); + } while (r == ARCHIVE_OK); + return (r); + } - /* Skip body of entry. */ + /* + * If the length is at the beginning, we can skip the + * compressed data much more quickly. + */ while (zip->entry_bytes_remaining > 0) { - bytes_avail = (a->compression_read_ahead)(a, buff, 1); + bytes_avail = (a->compression_read_ahead)(a, &buff, 1); if (bytes_avail <= 0) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file body"); return (ARCHIVE_FATAL); } if (bytes_avail > zip->entry_bytes_remaining) bytes_avail = zip->entry_bytes_remaining; (a->compression_read_consume)(a, bytes_avail); zip->entry_bytes_remaining -= bytes_avail; } + /* This entry is finished and done. */ + zip->end_of_entry_cleanup = zip->end_of_entry = 1; return (ARCHIVE_OK); } static int archive_read_format_zip_cleanup(struct archive *a) { struct zip *zip; zip = *(a->pformat_data); if (zip->uncompressed_buffer != NULL) free(zip->uncompressed_buffer); archive_string_free(&(zip->pathname)); archive_string_free(&(zip->extra)); free(zip); *(a->pformat_data) = NULL; return (ARCHIVE_OK); } static int i2(const char *p) { return ((0xff & (int)p[0]) + 256 * (0xff & (int)p[1])); } static int i4(const char *p) { return ((0xffff & i2(p)) + 0x10000 * (0xffff & i2(p+2))); +} + +static unsigned int +u2(const char *p) +{ + return ((0xff & (unsigned int)p[0]) + 256 * (0xff & (unsigned int)p[1])); +} + +static unsigned int +u4(const char *p) +{ + return u2(p) + 0x10000 * u2(p+2); +} + +static uint64_t +u8(const char *p) +{ + return u4(p) + 0x100000000LL * u4(p+4); +} + +/* + * The extra data is stored as a list of + * id1+size1+data1 + id2+size2+data2 ... + * triplets. id and size are 2 bytes each. + */ +static void +process_extra(const void* extra, struct zip* zip) +{ + int offset = 0; + const char *p = extra; + while (offset < zip->extra_length - 4) + { + unsigned short headerid = u2(p + offset); + unsigned short datasize = u2(p + offset + 2); + offset += 4; + if (offset + datasize > zip->extra_length) + break; +#ifdef DEBUG + fprintf(stderr, "Header id 0x%04x, length %d\n", + headerid, datasize); +#endif + switch (headerid) { + case 0x0001: + /* Zip64 extended information extra field. */ + if (datasize >= 8) + zip->uncompressed_size = u8(p + offset); + if (datasize >= 16) + zip->compressed_size = u8(p + offset + 8); + break; + case 0x5455: + { + /* Extended time field "UT". */ + int flags = p[offset]; + offset++; + datasize--; + /* Flag bits indicate which dates are present. */ + if (flags & 0x01) + { +#ifdef DEBUG + fprintf(stderr, "mtime: %d -> %d\n", + zip->mtime, i4(p + offset)); +#endif + if (datasize < 4) + break; + zip->mtime = i4(p + offset); + offset += 4; + datasize -= 4; + } + if (flags & 0x02) + { + if (datasize < 4) + break; + zip->atime = i4(p + offset); + offset += 4; + datasize -= 4; + } + if (flags & 0x04) + { + if (datasize < 4) + break; + zip->ctime = i4(p + offset); + offset += 4; + datasize -= 4; + } + break; + } + case 0x7855: + /* Info-ZIP Unix Extra Field (type 2) "Ux". */ +#ifdef DEBUG + fprintf(stderr, "uid %d gid %d\n", + i2(p + offset), i2(p + offset + 2)); +#endif + if (datasize >= 2) + zip->uid = i2(p + offset); + if (datasize >= 4) + zip->gid = i2(p + offset + 2); + break; + default: + break; + } + offset += datasize; + } +#ifdef DEBUG + if (offset != zip->extra_length) + { + fprintf(stderr, + "Extra data field contents do not match reported size!"); + } +#endif }