diff --git a/contrib/libarchive/libarchive/archive_read_disk_posix.c b/contrib/libarchive/libarchive/archive_read_disk_posix.c index d0e1f35c8291..2b39e672b49c 100644 --- a/contrib/libarchive/libarchive/archive_read_disk_posix.c +++ b/contrib/libarchive/libarchive/archive_read_disk_posix.c @@ -1,2726 +1,2731 @@ /*- * Copyright (c) 2003-2009 Tim Kientzle * Copyright (c) 2010-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* This is the tree-walking code for POSIX systems. */ #if !defined(_WIN32) || defined(__CYGWIN__) #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_SYS_PARAM_H #include #endif #ifdef HAVE_SYS_MOUNT_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_STATFS_H #include #endif #ifdef HAVE_SYS_STATVFS_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_LINUX_MAGIC_H #include #endif #ifdef HAVE_LINUX_FS_H #include #endif /* * Some Linux distributions have both linux/ext2_fs.h and ext2fs/ext2_fs.h. * As the include guards don't agree, the order of include is important. */ #ifdef HAVE_LINUX_EXT2_FS_H #include /* for Linux file flags */ #endif #if defined(HAVE_EXT2FS_EXT2_FS_H) && !defined(__CYGWIN__) #include /* Linux file flags, broken on Cygwin */ #endif #ifdef HAVE_DIRECT_H #include #endif #ifdef HAVE_DIRENT_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_IOCTL_H #include #endif #include "archive.h" #include "archive_string.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_read_disk_private.h" #ifndef HAVE_FCHDIR #error fchdir function required. #endif #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif +#if defined(__hpux) && !defined(HAVE_DIRFD) +#define dirfd(x) ((x)->__dd_fd) +#define HAVE_DIRFD +#endif + /*- * This is a new directory-walking system that addresses a number * of problems I've had with fts(3). In particular, it has no * pathname-length limits (other than the size of 'int'), handles * deep logical traversals, uses considerably less memory, and has * an opaque interface (easier to modify in the future). * * Internally, it keeps a single list of "tree_entry" items that * represent filesystem objects that require further attention. * Non-directories are not kept in memory: they are pulled from * readdir(), returned to the client, then freed as soon as possible. * Any directory entry to be traversed gets pushed onto the stack. * * There is surprisingly little information that needs to be kept for * each item on the stack. Just the name, depth (represented here as the * string length of the parent directory's pathname), and some markers * indicating how to get back to the parent (via chdir("..") for a * regular dir or via fchdir(2) for a symlink). */ /* * TODO: * 1) Loop checking. * 3) Arbitrary logical traversals by closing/reopening intermediate fds. */ struct restore_time { const char *name; time_t mtime; long mtime_nsec; time_t atime; long atime_nsec; mode_t filetype; int noatime; }; struct tree_entry { int depth; struct tree_entry *next; struct tree_entry *parent; struct archive_string name; size_t dirname_length; int64_t dev; int64_t ino; int flags; int filesystem_id; /* How to return back to the parent of a symlink. */ int symlink_parent_fd; /* How to restore time of a directory. */ struct restore_time restore_time; }; struct filesystem { int64_t dev; int synthetic; int remote; int noatime; #if defined(USE_READDIR_R) size_t name_max; #endif long incr_xfer_size; long max_xfer_size; long min_xfer_size; long xfer_align; /* * Buffer used for reading file contents. */ /* Exactly allocated memory pointer. */ unsigned char *allocation_ptr; /* Pointer adjusted to the filesystem alignment . */ unsigned char *buff; size_t buff_size; }; /* Definitions for tree_entry.flags bitmap. */ #define isDir 1 /* This entry is a regular directory. */ #define isDirLink 2 /* This entry is a symbolic link to a directory. */ #define needsFirstVisit 4 /* This is an initial entry. */ #define needsDescent 8 /* This entry needs to be previsited. */ #define needsOpen 16 /* This is a directory that needs to be opened. */ #define needsAscent 32 /* This entry needs to be postvisited. */ /* * Local data for this package. */ struct tree { struct tree_entry *stack; struct tree_entry *current; DIR *d; #define INVALID_DIR_HANDLE NULL struct dirent *de; #if defined(USE_READDIR_R) struct dirent *dirent; size_t dirent_allocated; #endif int flags; int visit_type; /* Error code from last failed operation. */ int tree_errno; /* Dynamically-sized buffer for holding path */ struct archive_string path; /* Last path element */ const char *basename; /* Leading dir length */ size_t dirname_length; int depth; int openCount; int maxOpenCount; int initial_dir_fd; int working_dir_fd; struct stat lst; struct stat st; int descend; int nlink; /* How to restore time of a file. */ struct restore_time restore_time; struct entry_sparse { int64_t length; int64_t offset; } *sparse_list, *current_sparse; int sparse_count; int sparse_list_size; char initial_symlink_mode; char symlink_mode; struct filesystem *current_filesystem; struct filesystem *filesystem_table; int initial_filesystem_id; int current_filesystem_id; int max_filesystem_id; int allocated_filesystem; int entry_fd; int entry_eof; int64_t entry_remaining_bytes; int64_t entry_total; unsigned char *entry_buff; size_t entry_buff_size; }; /* Definitions for tree.flags bitmap. */ #define hasStat 16 /* The st entry is valid. */ #define hasLstat 32 /* The lst entry is valid. */ #define onWorkingDir 64 /* We are on the working dir where we are * reading directory entry at this time. */ #define needsRestoreTimes 128 #define onInitialDir 256 /* We are on the initial dir. */ static int tree_dir_next_posix(struct tree *t); #ifdef HAVE_DIRENT_D_NAMLEN /* BSD extension; avoids need for a strlen() call. */ #define D_NAMELEN(dp) (dp)->d_namlen #else #define D_NAMELEN(dp) (strlen((dp)->d_name)) #endif /* Initiate/terminate a tree traversal. */ static struct tree *tree_open(const char *, int, int); static struct tree *tree_reopen(struct tree *, const char *, int); static void tree_close(struct tree *); static void tree_free(struct tree *); static void tree_push(struct tree *, const char *, int, int64_t, int64_t, struct restore_time *); static int tree_enter_initial_dir(struct tree *); static int tree_enter_working_dir(struct tree *); static int tree_current_dir_fd(struct tree *); /* * tree_next() returns Zero if there is no next entry, non-zero if * there is. Note that directories are visited three times. * Directories are always visited first as part of enumerating their * parent; that is a "regular" visit. If tree_descend() is invoked at * that time, the directory is added to a work list and will * subsequently be visited two more times: once just after descending * into the directory ("postdescent") and again just after ascending * back to the parent ("postascent"). * * TREE_ERROR_DIR is returned if the descent failed (because the * directory couldn't be opened, for instance). This is returned * instead of TREE_POSTDESCENT/TREE_POSTASCENT. TREE_ERROR_DIR is not a * fatal error, but it does imply that the relevant subtree won't be * visited. TREE_ERROR_FATAL is returned for an error that left the * traversal completely hosed. Right now, this is only returned for * chdir() failures during ascent. */ #define TREE_REGULAR 1 #define TREE_POSTDESCENT 2 #define TREE_POSTASCENT 3 #define TREE_ERROR_DIR -1 #define TREE_ERROR_FATAL -2 static int tree_next(struct tree *); /* * Return information about the current entry. */ /* * The current full pathname, length of the full pathname, and a name * that can be used to access the file. Because tree does use chdir * extensively, the access path is almost never the same as the full * current path. * * TODO: On platforms that support it, use openat()-style operations * to eliminate the chdir() operations entirely while still supporting * arbitrarily deep traversals. This makes access_path troublesome to * support, of course, which means we'll need a rich enough interface * that clients can function without it. (In particular, we'll need * tree_current_open() that returns an open file descriptor.) * */ static const char *tree_current_path(struct tree *); static const char *tree_current_access_path(struct tree *); /* * Request the lstat() or stat() data for the current path. Since the * tree package needs to do some of this anyway, and caches the * results, you should take advantage of it here if you need it rather * than make a redundant stat() or lstat() call of your own. */ static const struct stat *tree_current_stat(struct tree *); static const struct stat *tree_current_lstat(struct tree *); static int tree_current_is_symblic_link_target(struct tree *); /* The following functions use tricks to avoid a certain number of * stat()/lstat() calls. */ /* "is_physical_dir" is equivalent to S_ISDIR(tree_current_lstat()->st_mode) */ static int tree_current_is_physical_dir(struct tree *); /* "is_dir" is equivalent to S_ISDIR(tree_current_stat()->st_mode) */ static int tree_current_is_dir(struct tree *); static int update_current_filesystem(struct archive_read_disk *a, int64_t dev); static int setup_current_filesystem(struct archive_read_disk *); static int tree_target_is_same_as_parent(struct tree *, const struct stat *); static int _archive_read_disk_open(struct archive *, const char *); static int _archive_read_free(struct archive *); static int _archive_read_close(struct archive *); static int _archive_read_data_block(struct archive *, const void **, size_t *, int64_t *); static int _archive_read_next_header(struct archive *, struct archive_entry **); static int _archive_read_next_header2(struct archive *, struct archive_entry *); static const char *trivial_lookup_gname(void *, int64_t gid); static const char *trivial_lookup_uname(void *, int64_t uid); static int setup_sparse(struct archive_read_disk *, struct archive_entry *); static int close_and_restore_time(int fd, struct tree *, struct restore_time *); static int open_on_current_dir(struct tree *, const char *, int); static int tree_dup(int); static const struct archive_vtable archive_read_disk_vtable = { .archive_free = _archive_read_free, .archive_close = _archive_read_close, .archive_read_data_block = _archive_read_data_block, .archive_read_next_header = _archive_read_next_header, .archive_read_next_header2 = _archive_read_next_header2, }; const char * archive_read_disk_gname(struct archive *_a, la_int64_t gid) { struct archive_read_disk *a = (struct archive_read_disk *)_a; if (ARCHIVE_OK != __archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_gname")) return (NULL); if (a->lookup_gname == NULL) return (NULL); return ((*a->lookup_gname)(a->lookup_gname_data, gid)); } const char * archive_read_disk_uname(struct archive *_a, la_int64_t uid) { struct archive_read_disk *a = (struct archive_read_disk *)_a; if (ARCHIVE_OK != __archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_uname")) return (NULL); if (a->lookup_uname == NULL) return (NULL); return ((*a->lookup_uname)(a->lookup_uname_data, uid)); } int archive_read_disk_set_gname_lookup(struct archive *_a, void *private_data, const char * (*lookup_gname)(void *private, la_int64_t gid), void (*cleanup_gname)(void *private)) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(&a->archive, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_gname_lookup"); if (a->cleanup_gname != NULL && a->lookup_gname_data != NULL) (a->cleanup_gname)(a->lookup_gname_data); a->lookup_gname = lookup_gname; a->cleanup_gname = cleanup_gname; a->lookup_gname_data = private_data; return (ARCHIVE_OK); } int archive_read_disk_set_uname_lookup(struct archive *_a, void *private_data, const char * (*lookup_uname)(void *private, la_int64_t uid), void (*cleanup_uname)(void *private)) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(&a->archive, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_uname_lookup"); if (a->cleanup_uname != NULL && a->lookup_uname_data != NULL) (a->cleanup_uname)(a->lookup_uname_data); a->lookup_uname = lookup_uname; a->cleanup_uname = cleanup_uname; a->lookup_uname_data = private_data; return (ARCHIVE_OK); } /* * Create a new archive_read_disk object and initialize it with global state. */ struct archive * archive_read_disk_new(void) { struct archive_read_disk *a; a = (struct archive_read_disk *)calloc(1, sizeof(*a)); if (a == NULL) return (NULL); a->archive.magic = ARCHIVE_READ_DISK_MAGIC; a->archive.state = ARCHIVE_STATE_NEW; a->archive.vtable = &archive_read_disk_vtable; a->entry = archive_entry_new2(&a->archive); a->lookup_uname = trivial_lookup_uname; a->lookup_gname = trivial_lookup_gname; a->flags = ARCHIVE_READDISK_MAC_COPYFILE; a->open_on_current_dir = open_on_current_dir; a->tree_current_dir_fd = tree_current_dir_fd; a->tree_enter_working_dir = tree_enter_working_dir; return (&a->archive); } static int _archive_read_free(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; int r; if (_a == NULL) return (ARCHIVE_OK); archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_free"); if (a->archive.state != ARCHIVE_STATE_CLOSED) r = _archive_read_close(&a->archive); else r = ARCHIVE_OK; tree_free(a->tree); if (a->cleanup_gname != NULL && a->lookup_gname_data != NULL) (a->cleanup_gname)(a->lookup_gname_data); if (a->cleanup_uname != NULL && a->lookup_uname_data != NULL) (a->cleanup_uname)(a->lookup_uname_data); archive_string_free(&a->archive.error_string); archive_entry_free(a->entry); a->archive.magic = 0; __archive_clean(&a->archive); free(a); return (r); } static int _archive_read_close(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_close"); if (a->archive.state != ARCHIVE_STATE_FATAL) a->archive.state = ARCHIVE_STATE_CLOSED; tree_close(a->tree); return (ARCHIVE_OK); } static void setup_symlink_mode(struct archive_read_disk *a, char symlink_mode, int follow_symlinks) { a->symlink_mode = symlink_mode; a->follow_symlinks = follow_symlinks; if (a->tree != NULL) { a->tree->initial_symlink_mode = a->symlink_mode; a->tree->symlink_mode = a->symlink_mode; } } int archive_read_disk_set_symlink_logical(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_symlink_logical"); setup_symlink_mode(a, 'L', 1); return (ARCHIVE_OK); } int archive_read_disk_set_symlink_physical(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_symlink_physical"); setup_symlink_mode(a, 'P', 0); return (ARCHIVE_OK); } int archive_read_disk_set_symlink_hybrid(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_symlink_hybrid"); setup_symlink_mode(a, 'H', 1);/* Follow symlinks initially. */ return (ARCHIVE_OK); } int archive_read_disk_set_atime_restored(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_restore_atime"); #ifdef HAVE_UTIMES a->flags |= ARCHIVE_READDISK_RESTORE_ATIME; if (a->tree != NULL) a->tree->flags |= needsRestoreTimes; return (ARCHIVE_OK); #else /* Display warning and unset flag */ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Cannot restore access time on this system"); a->flags &= ~ARCHIVE_READDISK_RESTORE_ATIME; return (ARCHIVE_WARN); #endif } int archive_read_disk_set_behavior(struct archive *_a, int flags) { struct archive_read_disk *a = (struct archive_read_disk *)_a; int r = ARCHIVE_OK; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_honor_nodump"); a->flags = flags; if (flags & ARCHIVE_READDISK_RESTORE_ATIME) r = archive_read_disk_set_atime_restored(_a); else { if (a->tree != NULL) a->tree->flags &= ~needsRestoreTimes; } return (r); } /* * Trivial implementations of gname/uname lookup functions. * These are normally overridden by the client, but these stub * versions ensure that we always have something that works. */ static const char * trivial_lookup_gname(void *private_data, int64_t gid) { (void)private_data; /* UNUSED */ (void)gid; /* UNUSED */ return (NULL); } static const char * trivial_lookup_uname(void *private_data, int64_t uid) { (void)private_data; /* UNUSED */ (void)uid; /* UNUSED */ return (NULL); } /* * Allocate memory for the reading buffer adjusted to the filesystem * alignment. */ static int setup_suitable_read_buffer(struct archive_read_disk *a) { struct tree *t = a->tree; struct filesystem *cf = t->current_filesystem; size_t asize; size_t s; if (cf->allocation_ptr == NULL) { /* If we couldn't get a filesystem alignment, * we use 4096 as default value but we won't use * O_DIRECT to open() and openat() operations. */ long xfer_align = (cf->xfer_align == -1)?4096:cf->xfer_align; if (cf->max_xfer_size != -1) asize = cf->max_xfer_size + xfer_align; else { long incr = cf->incr_xfer_size; /* Some platform does not set a proper value to * incr_xfer_size.*/ if (incr < 0) incr = cf->min_xfer_size; if (cf->min_xfer_size < 0) { incr = xfer_align; asize = xfer_align; } else asize = cf->min_xfer_size; /* Increase a buffer size up to 64K bytes in * a proper increment size. */ while (asize < 1024*64) asize += incr; /* Take a margin to adjust to the filesystem * alignment. */ asize += xfer_align; } cf->allocation_ptr = malloc(asize); if (cf->allocation_ptr == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory"); a->archive.state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } /* * Calculate proper address for the filesystem. */ s = (uintptr_t)cf->allocation_ptr; s %= xfer_align; if (s > 0) s = xfer_align - s; /* * Set a read buffer pointer in the proper alignment of * the current filesystem. */ cf->buff = cf->allocation_ptr + s; cf->buff_size = asize - xfer_align; } return (ARCHIVE_OK); } static int _archive_read_data_block(struct archive *_a, const void **buff, size_t *size, int64_t *offset) { struct archive_read_disk *a = (struct archive_read_disk *)_a; struct tree *t = a->tree; int r; ssize_t bytes; int64_t sparse_bytes; size_t buffbytes; int empty_sparse_region = 0; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_block"); if (t->entry_eof || t->entry_remaining_bytes <= 0) { r = ARCHIVE_EOF; goto abort_read_data; } /* * Open the current file. */ if (t->entry_fd < 0) { int flags = O_RDONLY | O_BINARY | O_CLOEXEC; /* * Eliminate or reduce cache effects if we can. * * Carefully consider this to be enabled. */ #if defined(O_DIRECT) && 0/* Disabled for now */ if (t->current_filesystem->xfer_align != -1 && t->nlink == 1) flags |= O_DIRECT; #endif #if defined(O_NOATIME) /* * Linux has O_NOATIME flag; use it if we need. */ if ((t->flags & needsRestoreTimes) != 0 && t->restore_time.noatime == 0) flags |= O_NOATIME; #endif t->entry_fd = open_on_current_dir(t, tree_current_access_path(t), flags); __archive_ensure_cloexec_flag(t->entry_fd); #if defined(O_NOATIME) /* * When we did open the file with O_NOATIME flag, * if successful, set 1 to t->restore_time.noatime * not to restore an atime of the file later. * if failed by EPERM, retry it without O_NOATIME flag. */ if (flags & O_NOATIME) { if (t->entry_fd >= 0) t->restore_time.noatime = 1; else if (errno == EPERM) flags &= ~O_NOATIME; } #endif if (t->entry_fd < 0) { archive_set_error(&a->archive, errno, "Couldn't open %s", tree_current_path(t)); r = ARCHIVE_FAILED; tree_enter_initial_dir(t); goto abort_read_data; } tree_enter_initial_dir(t); } /* * Allocate read buffer if not allocated. */ if (t->current_filesystem->allocation_ptr == NULL) { r = setup_suitable_read_buffer(a); if (r != ARCHIVE_OK) { a->archive.state = ARCHIVE_STATE_FATAL; goto abort_read_data; } } t->entry_buff = t->current_filesystem->buff; t->entry_buff_size = t->current_filesystem->buff_size; buffbytes = t->entry_buff_size; if ((int64_t)buffbytes > t->current_sparse->length) buffbytes = t->current_sparse->length; if (t->current_sparse->length == 0) empty_sparse_region = 1; /* * Skip hole. * TODO: Should we consider t->current_filesystem->xfer_align? */ if (t->current_sparse->offset > t->entry_total) { if (lseek(t->entry_fd, (off_t)t->current_sparse->offset, SEEK_SET) < 0) { archive_set_error(&a->archive, errno, "Seek error"); r = ARCHIVE_FATAL; a->archive.state = ARCHIVE_STATE_FATAL; goto abort_read_data; } sparse_bytes = t->current_sparse->offset - t->entry_total; t->entry_remaining_bytes -= sparse_bytes; t->entry_total += sparse_bytes; } /* * Read file contents. */ if (buffbytes > 0) { bytes = read(t->entry_fd, t->entry_buff, buffbytes); if (bytes < 0) { archive_set_error(&a->archive, errno, "Read error"); r = ARCHIVE_FATAL; a->archive.state = ARCHIVE_STATE_FATAL; goto abort_read_data; } } else bytes = 0; /* * Return an EOF unless we've read a leading empty sparse region, which * is used to represent fully-sparse files. */ if (bytes == 0 && !empty_sparse_region) { /* Get EOF */ t->entry_eof = 1; r = ARCHIVE_EOF; goto abort_read_data; } *buff = t->entry_buff; *size = bytes; *offset = t->entry_total; t->entry_total += bytes; t->entry_remaining_bytes -= bytes; if (t->entry_remaining_bytes == 0) { /* Close the current file descriptor */ close_and_restore_time(t->entry_fd, t, &t->restore_time); t->entry_fd = -1; t->entry_eof = 1; } t->current_sparse->offset += bytes; t->current_sparse->length -= bytes; if (t->current_sparse->length == 0 && !t->entry_eof) t->current_sparse++; return (ARCHIVE_OK); abort_read_data: *buff = NULL; *size = 0; *offset = t->entry_total; if (t->entry_fd >= 0) { /* Close the current file descriptor */ close_and_restore_time(t->entry_fd, t, &t->restore_time); t->entry_fd = -1; } return (r); } static int next_entry(struct archive_read_disk *a, struct tree *t, struct archive_entry *entry) { const struct stat *st; /* info to use for this entry */ const struct stat *lst;/* lstat() information */ const char *name; int delayed, delayed_errno, descend, r; struct archive_string delayed_str; delayed = ARCHIVE_OK; delayed_errno = 0; archive_string_init(&delayed_str); st = NULL; lst = NULL; t->descend = 0; do { switch (tree_next(t)) { case TREE_ERROR_FATAL: archive_set_error(&a->archive, t->tree_errno, "%s: Unable to continue traversing directory tree", tree_current_path(t)); a->archive.state = ARCHIVE_STATE_FATAL; tree_enter_initial_dir(t); return (ARCHIVE_FATAL); case TREE_ERROR_DIR: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "%s: Couldn't visit directory", tree_current_path(t)); tree_enter_initial_dir(t); return (ARCHIVE_FAILED); case 0: tree_enter_initial_dir(t); return (ARCHIVE_EOF); case TREE_POSTDESCENT: case TREE_POSTASCENT: break; case TREE_REGULAR: lst = tree_current_lstat(t); if (lst == NULL) { if (errno == ENOENT && t->depth > 0) { delayed = ARCHIVE_WARN; delayed_errno = errno; if (delayed_str.length == 0) { archive_string_sprintf(&delayed_str, "%s", tree_current_path(t)); } else { archive_string_sprintf(&delayed_str, " %s", tree_current_path(t)); } } else { archive_set_error(&a->archive, errno, "%s: Cannot stat", tree_current_path(t)); tree_enter_initial_dir(t); return (ARCHIVE_FAILED); } } break; } } while (lst == NULL); #ifdef __APPLE__ if (a->flags & ARCHIVE_READDISK_MAC_COPYFILE) { /* If we're using copyfile(), ignore "._XXX" files. */ const char *bname = strrchr(tree_current_path(t), '/'); if (bname == NULL) bname = tree_current_path(t); else ++bname; if (bname[0] == '.' && bname[1] == '_') return (ARCHIVE_RETRY); } #endif archive_entry_copy_pathname(entry, tree_current_path(t)); /* * Perform path matching. */ if (a->matching) { r = archive_match_path_excluded(a->matching, entry); if (r < 0) { archive_set_error(&(a->archive), errno, "Failed : %s", archive_error_string(a->matching)); return (r); } if (r) { if (a->excluded_cb_func) a->excluded_cb_func(&(a->archive), a->excluded_cb_data, entry); return (ARCHIVE_RETRY); } } /* * Distinguish 'L'/'P'/'H' symlink following. */ switch(t->symlink_mode) { case 'H': /* 'H': After the first item, rest like 'P'. */ t->symlink_mode = 'P'; /* 'H': First item (from command line) like 'L'. */ /* FALLTHROUGH */ case 'L': /* 'L': Do descend through a symlink to dir. */ descend = tree_current_is_dir(t); /* 'L': Follow symlinks to files. */ a->symlink_mode = 'L'; a->follow_symlinks = 1; /* 'L': Archive symlinks as targets, if we can. */ st = tree_current_stat(t); if (st != NULL && !tree_target_is_same_as_parent(t, st)) break; /* If stat fails, we have a broken symlink; * in that case, don't follow the link. */ /* FALLTHROUGH */ default: /* 'P': Don't descend through a symlink to dir. */ descend = tree_current_is_physical_dir(t); /* 'P': Don't follow symlinks to files. */ a->symlink_mode = 'P'; a->follow_symlinks = 0; /* 'P': Archive symlinks as symlinks. */ st = lst; break; } if (update_current_filesystem(a, st->st_dev) != ARCHIVE_OK) { a->archive.state = ARCHIVE_STATE_FATAL; tree_enter_initial_dir(t); return (ARCHIVE_FATAL); } if (t->initial_filesystem_id == -1) t->initial_filesystem_id = t->current_filesystem_id; if (a->flags & ARCHIVE_READDISK_NO_TRAVERSE_MOUNTS) { if (t->initial_filesystem_id != t->current_filesystem_id) descend = 0; } t->descend = descend; /* * Honor nodump flag. * If the file is marked with nodump flag, do not return this entry. */ if (a->flags & ARCHIVE_READDISK_HONOR_NODUMP) { #if defined(HAVE_STRUCT_STAT_ST_FLAGS) && defined(UF_NODUMP) if (st->st_flags & UF_NODUMP) return (ARCHIVE_RETRY); #elif (defined(FS_IOC_GETFLAGS) && defined(FS_NODUMP_FL) && \ defined(HAVE_WORKING_FS_IOC_GETFLAGS)) || \ (defined(EXT2_IOC_GETFLAGS) && defined(EXT2_NODUMP_FL) && \ defined(HAVE_WORKING_EXT2_IOC_GETFLAGS)) if (S_ISREG(st->st_mode) || S_ISDIR(st->st_mode)) { int stflags; t->entry_fd = open_on_current_dir(t, tree_current_access_path(t), O_RDONLY | O_NONBLOCK | O_CLOEXEC); __archive_ensure_cloexec_flag(t->entry_fd); if (t->entry_fd >= 0) { r = ioctl(t->entry_fd, #ifdef FS_IOC_GETFLAGS FS_IOC_GETFLAGS, #else EXT2_IOC_GETFLAGS, #endif &stflags); #ifdef FS_NODUMP_FL if (r == 0 && (stflags & FS_NODUMP_FL) != 0) #else if (r == 0 && (stflags & EXT2_NODUMP_FL) != 0) #endif return (ARCHIVE_RETRY); } } #endif } archive_entry_copy_stat(entry, st); /* Save the times to be restored. This must be in before * calling archive_read_disk_descend() or any chance of it, * especially, invoking a callback. */ t->restore_time.mtime = archive_entry_mtime(entry); t->restore_time.mtime_nsec = archive_entry_mtime_nsec(entry); t->restore_time.atime = archive_entry_atime(entry); t->restore_time.atime_nsec = archive_entry_atime_nsec(entry); t->restore_time.filetype = archive_entry_filetype(entry); t->restore_time.noatime = t->current_filesystem->noatime; /* * Perform time matching. */ if (a->matching) { r = archive_match_time_excluded(a->matching, entry); if (r < 0) { archive_set_error(&(a->archive), errno, "Failed : %s", archive_error_string(a->matching)); return (r); } if (r) { if (a->excluded_cb_func) a->excluded_cb_func(&(a->archive), a->excluded_cb_data, entry); return (ARCHIVE_RETRY); } } /* Lookup uname/gname */ name = archive_read_disk_uname(&(a->archive), archive_entry_uid(entry)); if (name != NULL) archive_entry_copy_uname(entry, name); name = archive_read_disk_gname(&(a->archive), archive_entry_gid(entry)); if (name != NULL) archive_entry_copy_gname(entry, name); /* * Perform owner matching. */ if (a->matching) { r = archive_match_owner_excluded(a->matching, entry); if (r < 0) { archive_set_error(&(a->archive), errno, "Failed : %s", archive_error_string(a->matching)); return (r); } if (r) { if (a->excluded_cb_func) a->excluded_cb_func(&(a->archive), a->excluded_cb_data, entry); return (ARCHIVE_RETRY); } } /* * Invoke a meta data filter callback. */ if (a->metadata_filter_func) { if (!a->metadata_filter_func(&(a->archive), a->metadata_filter_data, entry)) return (ARCHIVE_RETRY); } /* * Populate the archive_entry with metadata from the disk. */ archive_entry_copy_sourcepath(entry, tree_current_access_path(t)); r = archive_read_disk_entry_from_file(&(a->archive), entry, t->entry_fd, st); if (r == ARCHIVE_OK) { r = delayed; if (r != ARCHIVE_OK) { archive_string_sprintf(&delayed_str, ": %s", "File removed before we read it"); archive_set_error(&(a->archive), delayed_errno, "%s", delayed_str.s); } } archive_string_free(&delayed_str); return (r); } static int _archive_read_next_header(struct archive *_a, struct archive_entry **entryp) { int ret; struct archive_read_disk *a = (struct archive_read_disk *)_a; *entryp = NULL; ret = _archive_read_next_header2(_a, a->entry); *entryp = a->entry; return ret; } static int _archive_read_next_header2(struct archive *_a, struct archive_entry *entry) { struct archive_read_disk *a = (struct archive_read_disk *)_a; struct tree *t; int r; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_next_header2"); t = a->tree; if (t->entry_fd >= 0) { close_and_restore_time(t->entry_fd, t, &t->restore_time); t->entry_fd = -1; } archive_entry_clear(entry); for (;;) { r = next_entry(a, t, entry); if (t->entry_fd >= 0) { close(t->entry_fd); t->entry_fd = -1; } if (r == ARCHIVE_RETRY) { archive_entry_clear(entry); continue; } break; } /* Return to the initial directory. */ tree_enter_initial_dir(t); /* * EOF and FATAL are persistent at this layer. By * modifying the state, we guarantee that future calls to * read a header or read data will fail. */ switch (r) { case ARCHIVE_EOF: a->archive.state = ARCHIVE_STATE_EOF; break; case ARCHIVE_OK: case ARCHIVE_WARN: /* Overwrite the sourcepath based on the initial directory. */ archive_entry_copy_sourcepath(entry, tree_current_path(t)); t->entry_total = 0; if (archive_entry_filetype(entry) == AE_IFREG) { t->nlink = archive_entry_nlink(entry); t->entry_remaining_bytes = archive_entry_size(entry); t->entry_eof = (t->entry_remaining_bytes == 0)? 1: 0; if (!t->entry_eof && setup_sparse(a, entry) != ARCHIVE_OK) return (ARCHIVE_FATAL); } else { t->entry_remaining_bytes = 0; t->entry_eof = 1; } a->archive.state = ARCHIVE_STATE_DATA; break; case ARCHIVE_RETRY: break; case ARCHIVE_FATAL: a->archive.state = ARCHIVE_STATE_FATAL; break; } __archive_reset_read_data(&a->archive); return (r); } static int setup_sparse(struct archive_read_disk *a, struct archive_entry *entry) { struct tree *t = a->tree; int64_t length, offset; int i; t->sparse_count = archive_entry_sparse_reset(entry); if (t->sparse_count+1 > t->sparse_list_size) { free(t->sparse_list); t->sparse_list_size = t->sparse_count + 1; t->sparse_list = malloc(sizeof(t->sparse_list[0]) * t->sparse_list_size); if (t->sparse_list == NULL) { t->sparse_list_size = 0; archive_set_error(&a->archive, ENOMEM, "Can't allocate data"); a->archive.state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } } for (i = 0; i < t->sparse_count; i++) { archive_entry_sparse_next(entry, &offset, &length); t->sparse_list[i].offset = offset; t->sparse_list[i].length = length; } if (i == 0) { t->sparse_list[i].offset = 0; t->sparse_list[i].length = archive_entry_size(entry); } else { t->sparse_list[i].offset = archive_entry_size(entry); t->sparse_list[i].length = 0; } t->current_sparse = t->sparse_list; return (ARCHIVE_OK); } int archive_read_disk_set_matching(struct archive *_a, struct archive *_ma, void (*_excluded_func)(struct archive *, void *, struct archive_entry *), void *_client_data) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_matching"); a->matching = _ma; a->excluded_cb_func = _excluded_func; a->excluded_cb_data = _client_data; return (ARCHIVE_OK); } int archive_read_disk_set_metadata_filter_callback(struct archive *_a, int (*_metadata_filter_func)(struct archive *, void *, struct archive_entry *), void *_client_data) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_ANY, "archive_read_disk_set_metadata_filter_callback"); a->metadata_filter_func = _metadata_filter_func; a->metadata_filter_data = _client_data; return (ARCHIVE_OK); } int archive_read_disk_can_descend(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; struct tree *t = a->tree; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_disk_can_descend"); return (t->visit_type == TREE_REGULAR && t->descend); } /* * Called by the client to mark the directory just returned from * tree_next() as needing to be visited. */ int archive_read_disk_descend(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; struct tree *t = a->tree; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_disk_descend"); if (!archive_read_disk_can_descend(_a)) return (ARCHIVE_OK); /* * We must not treat the initial specified path as a physical dir, * because if we do then we will try and ascend out of it by opening * ".." which is (a) wrong and (b) causes spurious permissions errors * if ".." is not readable by us. Instead, treat it as if it were a * symlink. (This uses an extra fd, but it can only happen once at the * top level of a traverse.) But we can't necessarily assume t->st is * valid here (though t->lst is), which complicates the logic a * little. */ if (tree_current_is_physical_dir(t)) { tree_push(t, t->basename, t->current_filesystem_id, t->lst.st_dev, t->lst.st_ino, &t->restore_time); if (t->stack->parent->parent != NULL) t->stack->flags |= isDir; else t->stack->flags |= isDirLink; } else if (tree_current_is_dir(t)) { tree_push(t, t->basename, t->current_filesystem_id, t->st.st_dev, t->st.st_ino, &t->restore_time); t->stack->flags |= isDirLink; } t->descend = 0; return (ARCHIVE_OK); } int archive_read_disk_open(struct archive *_a, const char *pathname) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_NEW | ARCHIVE_STATE_CLOSED, "archive_read_disk_open"); archive_clear_error(&a->archive); return (_archive_read_disk_open(_a, pathname)); } int archive_read_disk_open_w(struct archive *_a, const wchar_t *pathname) { struct archive_read_disk *a = (struct archive_read_disk *)_a; struct archive_string path; int ret; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_NEW | ARCHIVE_STATE_CLOSED, "archive_read_disk_open_w"); archive_clear_error(&a->archive); /* Make a char string from a wchar_t string. */ archive_string_init(&path); if (archive_string_append_from_wcs(&path, pathname, wcslen(pathname)) != 0) { if (errno == ENOMEM) archive_set_error(&a->archive, ENOMEM, "Can't allocate memory"); else archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't convert a path to a char string"); a->archive.state = ARCHIVE_STATE_FATAL; ret = ARCHIVE_FATAL; } else ret = _archive_read_disk_open(_a, path.s); archive_string_free(&path); return (ret); } static int _archive_read_disk_open(struct archive *_a, const char *pathname) { struct archive_read_disk *a = (struct archive_read_disk *)_a; if (a->tree != NULL) a->tree = tree_reopen(a->tree, pathname, a->flags & ARCHIVE_READDISK_RESTORE_ATIME); else a->tree = tree_open(pathname, a->symlink_mode, a->flags & ARCHIVE_READDISK_RESTORE_ATIME); if (a->tree == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate tar data"); a->archive.state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } a->archive.state = ARCHIVE_STATE_HEADER; return (ARCHIVE_OK); } /* * Return a current filesystem ID which is index of the filesystem entry * you've visited through archive_read_disk. */ int archive_read_disk_current_filesystem(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, "archive_read_disk_current_filesystem"); return (a->tree->current_filesystem_id); } static int update_current_filesystem(struct archive_read_disk *a, int64_t dev) { struct tree *t = a->tree; int i, fid; if (t->current_filesystem != NULL && t->current_filesystem->dev == dev) return (ARCHIVE_OK); for (i = 0; i < t->max_filesystem_id; i++) { if (t->filesystem_table[i].dev == dev) { /* There is the filesystem ID we've already generated. */ t->current_filesystem_id = i; t->current_filesystem = &(t->filesystem_table[i]); return (ARCHIVE_OK); } } /* * This is the new filesystem which we have to generate a new ID for. */ fid = t->max_filesystem_id++; if (t->max_filesystem_id > t->allocated_filesystem) { size_t s; void *p; s = t->max_filesystem_id * 2; p = realloc(t->filesystem_table, s * sizeof(*t->filesystem_table)); if (p == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate tar data"); return (ARCHIVE_FATAL); } t->filesystem_table = (struct filesystem *)p; t->allocated_filesystem = s; } t->current_filesystem_id = fid; t->current_filesystem = &(t->filesystem_table[fid]); t->current_filesystem->dev = dev; t->current_filesystem->allocation_ptr = NULL; t->current_filesystem->buff = NULL; /* Setup the current filesystem properties which depend on * platform specific. */ return (setup_current_filesystem(a)); } /* * Returns 1 if current filesystem is generated filesystem, 0 if it is not * or -1 if it is unknown. */ int archive_read_disk_current_filesystem_is_synthetic(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, "archive_read_disk_current_filesystem"); return (a->tree->current_filesystem->synthetic); } /* * Returns 1 if current filesystem is remote filesystem, 0 if it is not * or -1 if it is unknown. */ int archive_read_disk_current_filesystem_is_remote(struct archive *_a) { struct archive_read_disk *a = (struct archive_read_disk *)_a; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, "archive_read_disk_current_filesystem"); return (a->tree->current_filesystem->remote); } #if defined(_PC_REC_INCR_XFER_SIZE) && defined(_PC_REC_MAX_XFER_SIZE) &&\ defined(_PC_REC_MIN_XFER_SIZE) && defined(_PC_REC_XFER_ALIGN) static int get_xfer_size(struct tree *t, int fd, const char *path) { t->current_filesystem->xfer_align = -1; errno = 0; if (fd >= 0) { t->current_filesystem->incr_xfer_size = fpathconf(fd, _PC_REC_INCR_XFER_SIZE); t->current_filesystem->max_xfer_size = fpathconf(fd, _PC_REC_MAX_XFER_SIZE); t->current_filesystem->min_xfer_size = fpathconf(fd, _PC_REC_MIN_XFER_SIZE); t->current_filesystem->xfer_align = fpathconf(fd, _PC_REC_XFER_ALIGN); } else if (path != NULL) { t->current_filesystem->incr_xfer_size = pathconf(path, _PC_REC_INCR_XFER_SIZE); t->current_filesystem->max_xfer_size = pathconf(path, _PC_REC_MAX_XFER_SIZE); t->current_filesystem->min_xfer_size = pathconf(path, _PC_REC_MIN_XFER_SIZE); t->current_filesystem->xfer_align = pathconf(path, _PC_REC_XFER_ALIGN); } /* At least we need an alignment size. */ if (t->current_filesystem->xfer_align == -1) return ((errno == EINVAL)?1:-1); else return (0); } #else static int get_xfer_size(struct tree *t, int fd, const char *path) { (void)t; /* UNUSED */ (void)fd; /* UNUSED */ (void)path; /* UNUSED */ return (1);/* Not supported */ } #endif #if defined(HAVE_STATVFS) static inline __LA_UNUSED void set_statvfs_transfer_size(struct filesystem *fs, const struct statvfs *sfs) { fs->xfer_align = sfs->f_frsize > 0 ? (long)sfs->f_frsize : -1; fs->max_xfer_size = -1; #if defined(HAVE_STRUCT_STATVFS_F_IOSIZE) fs->min_xfer_size = sfs->f_iosize > 0 ? (long)sfs->f_iosize : -1; fs->incr_xfer_size = sfs->f_iosize > 0 ? (long)sfs->f_iosize : -1; #else fs->min_xfer_size = sfs->f_bsize > 0 ? (long)sfs->f_bsize : -1; fs->incr_xfer_size = sfs->f_bsize > 0 ? (long)sfs->f_bsize : -1; #endif } #endif #if defined(HAVE_STRUCT_STATFS) static inline __LA_UNUSED void set_statfs_transfer_size(struct filesystem *fs, const struct statfs *sfs) { fs->xfer_align = sfs->f_bsize > 0 ? (long)sfs->f_bsize : -1; fs->max_xfer_size = -1; #if defined(HAVE_STRUCT_STATFS_F_IOSIZE) fs->min_xfer_size = sfs->f_iosize > 0 ? (long)sfs->f_iosize : -1; fs->incr_xfer_size = sfs->f_iosize > 0 ? (long)sfs->f_iosize : -1; #else fs->min_xfer_size = sfs->f_bsize > 0 ? (long)sfs->f_bsize : -1; fs->incr_xfer_size = sfs->f_bsize > 0 ? (long)sfs->f_bsize : -1; #endif } #endif #if defined(HAVE_STRUCT_STATFS) && defined(HAVE_STATFS) && \ defined(HAVE_FSTATFS) && defined(MNT_LOCAL) && !defined(ST_LOCAL) /* * Gather current filesystem properties on FreeBSD, OpenBSD and Mac OS X. */ static int setup_current_filesystem(struct archive_read_disk *a) { struct tree *t = a->tree; struct statfs sfs; #if defined(HAVE_GETVFSBYNAME) && defined(VFCF_SYNTHETIC) /* TODO: configure should set GETVFSBYNAME_ARG_TYPE to make * this accurate; some platforms have both and we need the one that's * used by getvfsbyname() * * Then the following would become: * #if defined(GETVFSBYNAME_ARG_TYPE) * GETVFSBYNAME_ARG_TYPE vfc; * #endif */ # if defined(HAVE_STRUCT_XVFSCONF) struct xvfsconf vfc; # else struct vfsconf vfc; # endif #endif int r, xr = 0; #if !defined(HAVE_STRUCT_STATFS_F_NAMEMAX) long nm; #endif t->current_filesystem->synthetic = -1; t->current_filesystem->remote = -1; if (tree_current_is_symblic_link_target(t)) { #if defined(HAVE_OPENAT) /* * Get file system statistics on any directory * where current is. */ int fd = openat(tree_current_dir_fd(t), tree_current_access_path(t), O_RDONLY | O_CLOEXEC); __archive_ensure_cloexec_flag(fd); if (fd < 0) { archive_set_error(&a->archive, errno, "openat failed"); return (ARCHIVE_FAILED); } r = fstatfs(fd, &sfs); if (r == 0) xr = get_xfer_size(t, fd, NULL); close(fd); #else if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } r = statfs(tree_current_access_path(t), &sfs); if (r == 0) xr = get_xfer_size(t, -1, tree_current_access_path(t)); #endif } else { r = fstatfs(tree_current_dir_fd(t), &sfs); if (r == 0) xr = get_xfer_size(t, tree_current_dir_fd(t), NULL); } if (r == -1 || xr == -1) { archive_set_error(&a->archive, errno, "statfs failed"); return (ARCHIVE_FAILED); } else if (xr == 1) { /* pathconf(_PC_REX_*) operations are not supported. */ set_statfs_transfer_size(t->current_filesystem, &sfs); } if (sfs.f_flags & MNT_LOCAL) t->current_filesystem->remote = 0; else t->current_filesystem->remote = 1; #if defined(HAVE_GETVFSBYNAME) && defined(VFCF_SYNTHETIC) r = getvfsbyname(sfs.f_fstypename, &vfc); if (r == -1) { archive_set_error(&a->archive, errno, "getvfsbyname failed"); return (ARCHIVE_FAILED); } if (vfc.vfc_flags & VFCF_SYNTHETIC) t->current_filesystem->synthetic = 1; else t->current_filesystem->synthetic = 0; #endif #if defined(MNT_NOATIME) if (sfs.f_flags & MNT_NOATIME) t->current_filesystem->noatime = 1; else #endif t->current_filesystem->noatime = 0; #if defined(USE_READDIR_R) /* Set maximum filename length. */ #if defined(HAVE_STRUCT_STATFS_F_NAMEMAX) t->current_filesystem->name_max = sfs.f_namemax; #else # if defined(_PC_NAME_MAX) /* Mac OS X does not have f_namemax in struct statfs. */ if (tree_current_is_symblic_link_target(t)) { if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } nm = pathconf(tree_current_access_path(t), _PC_NAME_MAX); } else nm = fpathconf(tree_current_dir_fd(t), _PC_NAME_MAX); # else nm = -1; # endif if (nm == -1) t->current_filesystem->name_max = NAME_MAX; else t->current_filesystem->name_max = nm; #endif #endif /* USE_READDIR_R */ return (ARCHIVE_OK); } #elif (defined(HAVE_STATVFS) || defined(HAVE_FSTATVFS)) && defined(ST_LOCAL) /* * Gather current filesystem properties on NetBSD */ static int setup_current_filesystem(struct archive_read_disk *a) { struct tree *t = a->tree; struct statvfs svfs; int r, xr = 0; t->current_filesystem->synthetic = -1; if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } if (tree_current_is_symblic_link_target(t)) { r = statvfs(tree_current_access_path(t), &svfs); if (r == 0) xr = get_xfer_size(t, -1, tree_current_access_path(t)); } else { #ifdef HAVE_FSTATVFS r = fstatvfs(tree_current_dir_fd(t), &svfs); if (r == 0) xr = get_xfer_size(t, tree_current_dir_fd(t), NULL); #else r = statvfs(".", &svfs); if (r == 0) xr = get_xfer_size(t, -1, "."); #endif } if (r == -1 || xr == -1) { t->current_filesystem->remote = -1; archive_set_error(&a->archive, errno, "statvfs failed"); return (ARCHIVE_FAILED); } else if (xr == 1) { /* Usually come here unless NetBSD supports _PC_REC_XFER_ALIGN * for pathconf() function. */ set_statvfs_transfer_size(t->current_filesystem, &svfs); } if (svfs.f_flag & ST_LOCAL) t->current_filesystem->remote = 0; else t->current_filesystem->remote = 1; #if defined(ST_NOATIME) if (svfs.f_flag & ST_NOATIME) t->current_filesystem->noatime = 1; else #endif t->current_filesystem->noatime = 0; /* Set maximum filename length. */ t->current_filesystem->name_max = svfs.f_namemax; return (ARCHIVE_OK); } #elif defined(HAVE_SYS_STATFS_H) && defined(HAVE_LINUX_MAGIC_H) &&\ defined(HAVE_STATFS) && defined(HAVE_FSTATFS) /* * Note: statfs is deprecated since LSB 3.2 */ #ifndef CIFS_SUPER_MAGIC #define CIFS_SUPER_MAGIC 0xFF534D42 #endif #ifndef DEVFS_SUPER_MAGIC #define DEVFS_SUPER_MAGIC 0x1373 #endif /* * Gather current filesystem properties on Linux */ static int setup_current_filesystem(struct archive_read_disk *a) { struct tree *t = a->tree; struct statfs sfs; #if defined(HAVE_STATVFS) struct statvfs svfs; #endif int r, vr = 0, xr = 0; if (tree_current_is_symblic_link_target(t)) { #if defined(HAVE_OPENAT) /* * Get file system statistics on any directory * where current is. */ int fd = openat(tree_current_dir_fd(t), tree_current_access_path(t), O_RDONLY | O_CLOEXEC); __archive_ensure_cloexec_flag(fd); if (fd < 0) { archive_set_error(&a->archive, errno, "openat failed"); return (ARCHIVE_FAILED); } #if defined(HAVE_FSTATVFS) vr = fstatvfs(fd, &svfs);/* for f_flag, mount flags */ #endif r = fstatfs(fd, &sfs); if (r == 0) xr = get_xfer_size(t, fd, NULL); close(fd); #else if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } #if defined(HAVE_STATVFS) vr = statvfs(tree_current_access_path(t), &svfs); #endif r = statfs(tree_current_access_path(t), &sfs); if (r == 0) xr = get_xfer_size(t, -1, tree_current_access_path(t)); #endif } else { #ifdef HAVE_FSTATFS #if defined(HAVE_FSTATVFS) vr = fstatvfs(tree_current_dir_fd(t), &svfs); #endif r = fstatfs(tree_current_dir_fd(t), &sfs); if (r == 0) xr = get_xfer_size(t, tree_current_dir_fd(t), NULL); #else if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } #if defined(HAVE_STATVFS) vr = statvfs(".", &svfs); #endif r = statfs(".", &sfs); if (r == 0) xr = get_xfer_size(t, -1, "."); #endif } if (r == -1 || xr == -1 || vr == -1) { t->current_filesystem->synthetic = -1; t->current_filesystem->remote = -1; archive_set_error(&a->archive, errno, "statfs failed"); return (ARCHIVE_FAILED); } else if (xr == 1) { /* pathconf(_PC_REX_*) operations are not supported. */ #if defined(HAVE_STATVFS) set_statvfs_transfer_size(t->current_filesystem, &svfs); #else set_statfs_transfer_size(t->current_filesystem, &sfs); #endif } switch (sfs.f_type) { case AFS_SUPER_MAGIC: case CIFS_SUPER_MAGIC: case CODA_SUPER_MAGIC: case NCP_SUPER_MAGIC:/* NetWare */ case NFS_SUPER_MAGIC: case SMB_SUPER_MAGIC: t->current_filesystem->remote = 1; t->current_filesystem->synthetic = 0; break; case DEVFS_SUPER_MAGIC: case PROC_SUPER_MAGIC: case USBDEVICE_SUPER_MAGIC: t->current_filesystem->remote = 0; t->current_filesystem->synthetic = 1; break; default: t->current_filesystem->remote = 0; t->current_filesystem->synthetic = 0; break; } #if defined(ST_NOATIME) #if defined(HAVE_STATVFS) if (svfs.f_flag & ST_NOATIME) #else if (sfs.f_flags & ST_NOATIME) #endif t->current_filesystem->noatime = 1; else #endif t->current_filesystem->noatime = 0; #if defined(USE_READDIR_R) /* Set maximum filename length. */ t->current_filesystem->name_max = sfs.f_namelen; #endif return (ARCHIVE_OK); } #elif defined(HAVE_SYS_STATVFS_H) &&\ (defined(HAVE_STATVFS) || defined(HAVE_FSTATVFS)) /* * Gather current filesystem properties on other posix platform. */ static int setup_current_filesystem(struct archive_read_disk *a) { struct tree *t = a->tree; struct statvfs svfs; int r, xr = 0; t->current_filesystem->synthetic = -1;/* Not supported */ t->current_filesystem->remote = -1;/* Not supported */ if (tree_current_is_symblic_link_target(t)) { #if defined(HAVE_OPENAT) /* * Get file system statistics on any directory * where current is. */ int fd = openat(tree_current_dir_fd(t), tree_current_access_path(t), O_RDONLY | O_CLOEXEC); __archive_ensure_cloexec_flag(fd); if (fd < 0) { archive_set_error(&a->archive, errno, "openat failed"); return (ARCHIVE_FAILED); } r = fstatvfs(fd, &svfs); if (r == 0) xr = get_xfer_size(t, fd, NULL); close(fd); #else if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } r = statvfs(tree_current_access_path(t), &svfs); if (r == 0) xr = get_xfer_size(t, -1, tree_current_access_path(t)); #endif } else { #ifdef HAVE_FSTATVFS r = fstatvfs(tree_current_dir_fd(t), &svfs); if (r == 0) xr = get_xfer_size(t, tree_current_dir_fd(t), NULL); #else if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } r = statvfs(".", &svfs); if (r == 0) xr = get_xfer_size(t, -1, "."); #endif } if (r == -1 || xr == -1) { t->current_filesystem->synthetic = -1; t->current_filesystem->remote = -1; archive_set_error(&a->archive, errno, "statvfs failed"); return (ARCHIVE_FAILED); } else if (xr == 1) { /* pathconf(_PC_REX_*) operations are not supported. */ set_statvfs_transfer_size(t->current_filesystem, &svfs); } #if defined(ST_NOATIME) if (svfs.f_flag & ST_NOATIME) t->current_filesystem->noatime = 1; else #endif t->current_filesystem->noatime = 0; #if defined(USE_READDIR_R) /* Set maximum filename length. */ t->current_filesystem->name_max = svfs.f_namemax; #endif return (ARCHIVE_OK); } #else /* * Generic: Gather current filesystem properties. * TODO: Is this generic function really needed? */ static int setup_current_filesystem(struct archive_read_disk *a) { struct tree *t = a->tree; #if defined(_PC_NAME_MAX) && defined(USE_READDIR_R) long nm; #endif t->current_filesystem->synthetic = -1;/* Not supported */ t->current_filesystem->remote = -1;/* Not supported */ t->current_filesystem->noatime = 0; (void)get_xfer_size(t, -1, ".");/* Dummy call to avoid build error. */ t->current_filesystem->xfer_align = -1;/* Unknown */ t->current_filesystem->max_xfer_size = -1; t->current_filesystem->min_xfer_size = -1; t->current_filesystem->incr_xfer_size = -1; #if defined(USE_READDIR_R) /* Set maximum filename length. */ # if defined(_PC_NAME_MAX) if (tree_current_is_symblic_link_target(t)) { if (tree_enter_working_dir(t) != 0) { archive_set_error(&a->archive, errno, "fchdir failed"); return (ARCHIVE_FAILED); } nm = pathconf(tree_current_access_path(t), _PC_NAME_MAX); } else nm = fpathconf(tree_current_dir_fd(t), _PC_NAME_MAX); if (nm == -1) # endif /* _PC_NAME_MAX */ /* * Some systems (HP-UX or others?) incorrectly defined * NAME_MAX macro to be a smaller value. */ # if defined(NAME_MAX) && NAME_MAX >= 255 t->current_filesystem->name_max = NAME_MAX; # else /* No way to get a trusted value of maximum filename * length. */ t->current_filesystem->name_max = PATH_MAX; # endif /* NAME_MAX */ # if defined(_PC_NAME_MAX) else t->current_filesystem->name_max = nm; # endif /* _PC_NAME_MAX */ #endif /* USE_READDIR_R */ return (ARCHIVE_OK); } #endif static int close_and_restore_time(int fd, struct tree *t, struct restore_time *rt) { #ifndef HAVE_UTIMES (void)t; /* UNUSED */ (void)rt; /* UNUSED */ return (close(fd)); #else #if defined(HAVE_FUTIMENS) && !defined(__CYGWIN__) struct timespec timespecs[2]; #endif struct timeval times[2]; if ((t->flags & needsRestoreTimes) == 0 || rt->noatime) { if (fd >= 0) return (close(fd)); else return (0); } #if defined(HAVE_FUTIMENS) && !defined(__CYGWIN__) timespecs[1].tv_sec = rt->mtime; timespecs[1].tv_nsec = rt->mtime_nsec; timespecs[0].tv_sec = rt->atime; timespecs[0].tv_nsec = rt->atime_nsec; /* futimens() is defined in POSIX.1-2008. */ if (futimens(fd, timespecs) == 0) return (close(fd)); #endif times[1].tv_sec = rt->mtime; times[1].tv_usec = rt->mtime_nsec / 1000; times[0].tv_sec = rt->atime; times[0].tv_usec = rt->atime_nsec / 1000; #if !defined(HAVE_FUTIMENS) && defined(HAVE_FUTIMES) && !defined(__CYGWIN__) if (futimes(fd, times) == 0) return (close(fd)); #endif close(fd); #if defined(HAVE_FUTIMESAT) if (futimesat(tree_current_dir_fd(t), rt->name, times) == 0) return (0); #endif #ifdef HAVE_LUTIMES if (lutimes(rt->name, times) != 0) #else if (AE_IFLNK != rt->filetype && utimes(rt->name, times) != 0) #endif return (-1); #endif return (0); } static int open_on_current_dir(struct tree *t, const char *path, int flags) { #ifdef HAVE_OPENAT return (openat(tree_current_dir_fd(t), path, flags)); #else if (tree_enter_working_dir(t) != 0) return (-1); return (open(path, flags)); #endif } static int tree_dup(int fd) { int new_fd; #ifdef F_DUPFD_CLOEXEC static volatile int can_dupfd_cloexec = 1; if (can_dupfd_cloexec) { new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 0); if (new_fd != -1) return (new_fd); /* Linux 2.6.18 - 2.6.23 declare F_DUPFD_CLOEXEC, * but it cannot be used. So we have to try dup(). */ /* We won't try F_DUPFD_CLOEXEC. */ can_dupfd_cloexec = 0; } #endif /* F_DUPFD_CLOEXEC */ new_fd = dup(fd); __archive_ensure_cloexec_flag(new_fd); return (new_fd); } /* * Add a directory path to the current stack. */ static void tree_push(struct tree *t, const char *path, int filesystem_id, int64_t dev, int64_t ino, struct restore_time *rt) { struct tree_entry *te; te = calloc(1, sizeof(*te)); te->next = t->stack; te->parent = t->current; if (te->parent) te->depth = te->parent->depth + 1; t->stack = te; archive_string_init(&te->name); te->symlink_parent_fd = -1; archive_strcpy(&te->name, path); te->flags = needsDescent | needsOpen | needsAscent; te->filesystem_id = filesystem_id; te->dev = dev; te->ino = ino; te->dirname_length = t->dirname_length; te->restore_time.name = te->name.s; if (rt != NULL) { te->restore_time.mtime = rt->mtime; te->restore_time.mtime_nsec = rt->mtime_nsec; te->restore_time.atime = rt->atime; te->restore_time.atime_nsec = rt->atime_nsec; te->restore_time.filetype = rt->filetype; te->restore_time.noatime = rt->noatime; } } /* * Append a name to the current dir path. */ static void tree_append(struct tree *t, const char *name, size_t name_length) { size_t size_needed; t->path.s[t->dirname_length] = '\0'; t->path.length = t->dirname_length; /* Strip trailing '/' from name, unless entire name is "/". */ while (name_length > 1 && name[name_length - 1] == '/') name_length--; /* Resize pathname buffer as needed. */ size_needed = name_length + t->dirname_length + 2; archive_string_ensure(&t->path, size_needed); /* Add a separating '/' if it's needed. */ if (t->dirname_length > 0 && t->path.s[archive_strlen(&t->path)-1] != '/') archive_strappend_char(&t->path, '/'); t->basename = t->path.s + archive_strlen(&t->path); archive_strncat(&t->path, name, name_length); t->restore_time.name = t->basename; } /* * Open a directory tree for traversal. */ static struct tree * tree_open(const char *path, int symlink_mode, int restore_time) { struct tree *t; if ((t = calloc(1, sizeof(*t))) == NULL) return (NULL); archive_string_init(&t->path); archive_string_ensure(&t->path, 31); t->initial_symlink_mode = symlink_mode; return (tree_reopen(t, path, restore_time)); } static struct tree * tree_reopen(struct tree *t, const char *path, int restore_time) { #if defined(O_PATH) /* Linux */ const int o_flag = O_PATH; #elif defined(O_SEARCH) /* SunOS */ const int o_flag = O_SEARCH; #elif defined(__FreeBSD__) && defined(O_EXEC) /* FreeBSD */ const int o_flag = O_EXEC; #endif t->flags = (restore_time != 0)?needsRestoreTimes:0; t->flags |= onInitialDir; t->visit_type = 0; t->tree_errno = 0; t->dirname_length = 0; t->depth = 0; t->descend = 0; t->current = NULL; t->d = INVALID_DIR_HANDLE; t->symlink_mode = t->initial_symlink_mode; archive_string_empty(&t->path); t->entry_fd = -1; t->entry_eof = 0; t->entry_remaining_bytes = 0; t->initial_filesystem_id = -1; /* First item is set up a lot like a symlink traversal. */ tree_push(t, path, 0, 0, 0, NULL); t->stack->flags = needsFirstVisit; t->maxOpenCount = t->openCount = 1; t->initial_dir_fd = open(".", O_RDONLY | O_CLOEXEC); #if defined(O_PATH) || defined(O_SEARCH) || \ (defined(__FreeBSD__) && defined(O_EXEC)) /* * Most likely reason to fail opening "." is that it's not readable, * so try again for execute. The consequences of not opening this are * unhelpful and unnecessary errors later. */ if (t->initial_dir_fd < 0) t->initial_dir_fd = open(".", o_flag | O_CLOEXEC); #endif __archive_ensure_cloexec_flag(t->initial_dir_fd); t->working_dir_fd = tree_dup(t->initial_dir_fd); return (t); } static int tree_descent(struct tree *t) { int flag, new_fd, r = 0; t->dirname_length = archive_strlen(&t->path); flag = O_RDONLY | O_CLOEXEC; #if defined(O_DIRECTORY) flag |= O_DIRECTORY; #endif new_fd = open_on_current_dir(t, t->stack->name.s, flag); __archive_ensure_cloexec_flag(new_fd); if (new_fd < 0) { t->tree_errno = errno; r = TREE_ERROR_DIR; } else { t->depth++; /* If it is a link, set up fd for the ascent. */ if (t->stack->flags & isDirLink) { t->stack->symlink_parent_fd = t->working_dir_fd; t->openCount++; if (t->openCount > t->maxOpenCount) t->maxOpenCount = t->openCount; } else close(t->working_dir_fd); /* Renew the current working directory. */ t->working_dir_fd = new_fd; t->flags &= ~onWorkingDir; } return (r); } /* * We've finished a directory; ascend back to the parent. */ static int tree_ascend(struct tree *t) { struct tree_entry *te; int new_fd, r = 0, prev_dir_fd; te = t->stack; prev_dir_fd = t->working_dir_fd; if (te->flags & isDirLink) new_fd = te->symlink_parent_fd; else { new_fd = open_on_current_dir(t, "..", O_RDONLY | O_CLOEXEC); __archive_ensure_cloexec_flag(new_fd); } if (new_fd < 0) { t->tree_errno = errno; r = TREE_ERROR_FATAL; } else { /* Renew the current working directory. */ t->working_dir_fd = new_fd; t->flags &= ~onWorkingDir; /* Current directory has been changed, we should * close an fd of previous working directory. */ close_and_restore_time(prev_dir_fd, t, &te->restore_time); if (te->flags & isDirLink) { t->openCount--; te->symlink_parent_fd = -1; } t->depth--; } return (r); } /* * Return to the initial directory where tree_open() was performed. */ static int tree_enter_initial_dir(struct tree *t) { int r = 0; if ((t->flags & onInitialDir) == 0) { r = fchdir(t->initial_dir_fd); if (r == 0) { t->flags &= ~onWorkingDir; t->flags |= onInitialDir; } } return (r); } /* * Restore working directory of directory traversals. */ static int tree_enter_working_dir(struct tree *t) { int r = 0; /* * Change the current directory if really needed. * Sometimes this is unneeded when we did not do * descent. */ if (t->depth > 0 && (t->flags & onWorkingDir) == 0) { r = fchdir(t->working_dir_fd); if (r == 0) { t->flags &= ~onInitialDir; t->flags |= onWorkingDir; } } return (r); } static int tree_current_dir_fd(struct tree *t) { return (t->working_dir_fd); } /* * Pop the working stack. */ static void tree_pop(struct tree *t) { struct tree_entry *te; t->path.s[t->dirname_length] = '\0'; t->path.length = t->dirname_length; if (t->stack == t->current && t->current != NULL) t->current = t->current->parent; te = t->stack; t->stack = te->next; t->dirname_length = te->dirname_length; t->basename = t->path.s + t->dirname_length; while (t->basename[0] == '/') t->basename++; archive_string_free(&te->name); free(te); } /* * Get the next item in the tree traversal. */ static int tree_next(struct tree *t) { int r; while (t->stack != NULL) { /* If there's an open dir, get the next entry from there. */ if (t->d != INVALID_DIR_HANDLE) { r = tree_dir_next_posix(t); if (r == 0) continue; return (r); } if (t->stack->flags & needsFirstVisit) { /* Top stack item needs a regular visit. */ t->current = t->stack; tree_append(t, t->stack->name.s, archive_strlen(&(t->stack->name))); /* t->dirname_length = t->path_length; */ /* tree_pop(t); */ t->stack->flags &= ~needsFirstVisit; return (t->visit_type = TREE_REGULAR); } else if (t->stack->flags & needsDescent) { /* Top stack item is dir to descend into. */ t->current = t->stack; tree_append(t, t->stack->name.s, archive_strlen(&(t->stack->name))); t->stack->flags &= ~needsDescent; r = tree_descent(t); if (r != 0) { tree_pop(t); t->visit_type = r; } else t->visit_type = TREE_POSTDESCENT; return (t->visit_type); } else if (t->stack->flags & needsOpen) { t->stack->flags &= ~needsOpen; r = tree_dir_next_posix(t); if (r == 0) continue; return (r); } else if (t->stack->flags & needsAscent) { /* Top stack item is dir and we're done with it. */ r = tree_ascend(t); tree_pop(t); t->visit_type = r != 0 ? r : TREE_POSTASCENT; return (t->visit_type); } else { /* Top item on stack is dead. */ tree_pop(t); t->flags &= ~hasLstat; t->flags &= ~hasStat; } } return (t->visit_type = 0); } static int tree_dir_next_posix(struct tree *t) { int r; const char *name; size_t namelen; if (t->d == NULL) { #if defined(USE_READDIR_R) size_t dirent_size; #endif #if defined(HAVE_FDOPENDIR) t->d = fdopendir(tree_dup(t->working_dir_fd)); #else /* HAVE_FDOPENDIR */ if (tree_enter_working_dir(t) == 0) { t->d = opendir("."); -#if HAVE_DIRFD || defined(dirfd) +#ifdef HAVE_DIRFD __archive_ensure_cloexec_flag(dirfd(t->d)); #endif } #endif /* HAVE_FDOPENDIR */ if (t->d == NULL) { r = tree_ascend(t); /* Undo "chdir" */ tree_pop(t); t->tree_errno = errno; t->visit_type = r != 0 ? r : TREE_ERROR_DIR; return (t->visit_type); } #if defined(USE_READDIR_R) dirent_size = offsetof(struct dirent, d_name) + t->filesystem_table[t->current->filesystem_id].name_max + 1; if (t->dirent == NULL || t->dirent_allocated < dirent_size) { free(t->dirent); t->dirent = malloc(dirent_size); if (t->dirent == NULL) { closedir(t->d); t->d = INVALID_DIR_HANDLE; (void)tree_ascend(t); tree_pop(t); t->tree_errno = ENOMEM; t->visit_type = TREE_ERROR_DIR; return (t->visit_type); } t->dirent_allocated = dirent_size; } #endif /* USE_READDIR_R */ } for (;;) { errno = 0; #if defined(USE_READDIR_R) r = readdir_r(t->d, t->dirent, &t->de); #ifdef _AIX /* Note: According to the man page, return value 9 indicates * that the readdir_r was not successful and the error code * is set to the global errno variable. And then if the end * of directory entries was reached, the return value is 9 * and the third parameter is set to NULL and errno is * unchanged. */ if (r == 9) r = errno; #endif /* _AIX */ if (r != 0 || t->de == NULL) { #else t->de = readdir(t->d); if (t->de == NULL) { r = errno; #endif closedir(t->d); t->d = INVALID_DIR_HANDLE; if (r != 0) { t->tree_errno = r; t->visit_type = TREE_ERROR_DIR; return (t->visit_type); } else return (0); } name = t->de->d_name; namelen = D_NAMELEN(t->de); t->flags &= ~hasLstat; t->flags &= ~hasStat; if (name[0] == '.' && name[1] == '\0') continue; if (name[0] == '.' && name[1] == '.' && name[2] == '\0') continue; tree_append(t, name, namelen); return (t->visit_type = TREE_REGULAR); } } /* * Get the stat() data for the entry just returned from tree_next(). */ static const struct stat * tree_current_stat(struct tree *t) { if (!(t->flags & hasStat)) { #ifdef HAVE_FSTATAT if (fstatat(tree_current_dir_fd(t), tree_current_access_path(t), &t->st, 0) != 0) #else if (tree_enter_working_dir(t) != 0) return NULL; if (la_stat(tree_current_access_path(t), &t->st) != 0) #endif return NULL; t->flags |= hasStat; } return (&t->st); } /* * Get the lstat() data for the entry just returned from tree_next(). */ static const struct stat * tree_current_lstat(struct tree *t) { if (!(t->flags & hasLstat)) { #ifdef HAVE_FSTATAT if (fstatat(tree_current_dir_fd(t), tree_current_access_path(t), &t->lst, AT_SYMLINK_NOFOLLOW) != 0) #else if (tree_enter_working_dir(t) != 0) return NULL; if (lstat(tree_current_access_path(t), &t->lst) != 0) #endif return NULL; t->flags |= hasLstat; } return (&t->lst); } /* * Test whether current entry is a dir or link to a dir. */ static int tree_current_is_dir(struct tree *t) { const struct stat *st; /* * If we already have lstat() info, then try some * cheap tests to determine if this is a dir. */ if (t->flags & hasLstat) { /* If lstat() says it's a dir, it must be a dir. */ st = tree_current_lstat(t); if (st == NULL) return 0; if (S_ISDIR(st->st_mode)) return 1; /* Not a dir; might be a link to a dir. */ /* If it's not a link, then it's not a link to a dir. */ if (!S_ISLNK(st->st_mode)) return 0; /* * It's a link, but we don't know what it's a link to, * so we'll have to use stat(). */ } st = tree_current_stat(t); /* If we can't stat it, it's not a dir. */ if (st == NULL) return 0; /* Use the definitive test. Hopefully this is cached. */ return (S_ISDIR(st->st_mode)); } /* * Test whether current entry is a physical directory. Usually, we * already have at least one of stat() or lstat() in memory, so we * use tricks to try to avoid an extra trip to the disk. */ static int tree_current_is_physical_dir(struct tree *t) { const struct stat *st; /* * If stat() says it isn't a dir, then it's not a dir. * If stat() data is cached, this check is free, so do it first. */ if (t->flags & hasStat) { st = tree_current_stat(t); if (st == NULL) return (0); if (!S_ISDIR(st->st_mode)) return (0); } /* * Either stat() said it was a dir (in which case, we have * to determine whether it's really a link to a dir) or * stat() info wasn't available. So we use lstat(), which * hopefully is already cached. */ st = tree_current_lstat(t); /* If we can't stat it, it's not a dir. */ if (st == NULL) return 0; /* Use the definitive test. Hopefully this is cached. */ return (S_ISDIR(st->st_mode)); } /* * Test whether the same file has been in the tree as its parent. */ static int tree_target_is_same_as_parent(struct tree *t, const struct stat *st) { struct tree_entry *te; for (te = t->current->parent; te != NULL; te = te->parent) { if (te->dev == (int64_t)st->st_dev && te->ino == (int64_t)st->st_ino) return (1); } return (0); } /* * Test whether the current file is symbolic link target and * on the other filesystem. */ static int tree_current_is_symblic_link_target(struct tree *t) { static const struct stat *lst, *st; lst = tree_current_lstat(t); st = tree_current_stat(t); return (st != NULL && lst != NULL && (int64_t)st->st_dev == t->current_filesystem->dev && st->st_dev != lst->st_dev); } /* * Return the access path for the entry just returned from tree_next(). */ static const char * tree_current_access_path(struct tree *t) { return (t->basename); } /* * Return the full path for the entry just returned from tree_next(). */ static const char * tree_current_path(struct tree *t) { return (t->path.s); } /* * Terminate the traversal. */ static void tree_close(struct tree *t) { if (t == NULL) return; if (t->entry_fd >= 0) { close_and_restore_time(t->entry_fd, t, &t->restore_time); t->entry_fd = -1; } /* Close the handle of readdir(). */ if (t->d != INVALID_DIR_HANDLE) { closedir(t->d); t->d = INVALID_DIR_HANDLE; } /* Release anything remaining in the stack. */ while (t->stack != NULL) { if (t->stack->flags & isDirLink) close(t->stack->symlink_parent_fd); tree_pop(t); } if (t->working_dir_fd >= 0) { close(t->working_dir_fd); t->working_dir_fd = -1; } if (t->initial_dir_fd >= 0) { close(t->initial_dir_fd); t->initial_dir_fd = -1; } } /* * Release any resources. */ static void tree_free(struct tree *t) { int i; if (t == NULL) return; archive_string_free(&t->path); #if defined(USE_READDIR_R) free(t->dirent); #endif free(t->sparse_list); for (i = 0; i < t->max_filesystem_id; i++) free(t->filesystem_table[i].allocation_ptr); free(t->filesystem_table); free(t); } #endif diff --git a/contrib/libarchive/libarchive/archive_read_support_format_mtree.c b/contrib/libarchive/libarchive/archive_read_support_format_mtree.c index bfcec220285d..96eb133ed361 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_mtree.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_mtree.c @@ -1,2143 +1,2140 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle * Copyright (c) 2008 Joerg Sonnenberger * Copyright (c) 2011-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #include /* #include */ /* See archive_platform.h */ #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_CTYPE_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_entry_private.h" #include "archive_private.h" #include "archive_rb.h" #include "archive_read_private.h" #include "archive_string.h" #include "archive_pack_dev.h" #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif #define MTREE_HAS_DEVICE 0x0001 #define MTREE_HAS_FFLAGS 0x0002 #define MTREE_HAS_GID 0x0004 #define MTREE_HAS_GNAME 0x0008 #define MTREE_HAS_MTIME 0x0010 #define MTREE_HAS_NLINK 0x0020 #define MTREE_HAS_PERM 0x0040 #define MTREE_HAS_SIZE 0x0080 #define MTREE_HAS_TYPE 0x0100 #define MTREE_HAS_UID 0x0200 #define MTREE_HAS_UNAME 0x0400 #define MTREE_HAS_OPTIONAL 0x0800 #define MTREE_HAS_NOCHANGE 0x1000 /* FreeBSD specific */ #define MAX_LINE_LEN (1024 * 1024) struct mtree_option { struct mtree_option *next; char *value; }; struct mtree_entry { struct archive_rb_node rbnode; struct mtree_entry *next_dup; struct mtree_entry *next; struct mtree_option *options; char *name; char full; char used; }; struct mtree { struct archive_string line; size_t buffsize; char *buff; int64_t offset; int fd; int archive_format; const char *archive_format_name; struct mtree_entry *entries; struct mtree_entry *this_entry; struct archive_rb_tree entry_rbtree; struct archive_string current_dir; struct archive_string contents_name; struct archive_entry_linkresolver *resolver; struct archive_rb_tree rbtree; int64_t cur_size; char checkfs; }; static int bid_keycmp(const char *, const char *, ssize_t); static int cleanup(struct archive_read *); static int detect_form(struct archive_read *, int *); static int mtree_bid(struct archive_read *, int); static int parse_file(struct archive_read *, struct archive_entry *, struct mtree *, struct mtree_entry *, int *); static void parse_escapes(char *, struct mtree_entry *); static int parse_line(struct archive_read *, struct archive_entry *, struct mtree *, struct mtree_entry *, int *); static int parse_keyword(struct archive_read *, struct mtree *, struct archive_entry *, struct mtree_option *, int *); static int read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); static ssize_t readline(struct archive_read *, struct mtree *, char **, ssize_t); static int skip(struct archive_read *a); static int read_header(struct archive_read *, struct archive_entry *); static int64_t mtree_atol(char **, int base); #ifndef HAVE_STRNLEN static size_t mtree_strnlen(const char *, size_t); #endif /* * There's no standard for TIME_T_MAX/TIME_T_MIN. So we compute them * here. TODO: Move this to configure time, but be careful * about cross-compile environments. */ static int64_t get_time_t_max(void) { #if defined(TIME_T_MAX) return TIME_T_MAX; #else /* ISO C allows time_t to be a floating-point type, but POSIX requires an integer type. The following should work on any system that follows the POSIX conventions. */ if (((time_t)0) < ((time_t)-1)) { /* Time_t is unsigned */ return (~(time_t)0); } else { /* Time_t is signed. */ /* Assume it's the same as int64_t or int32_t */ if (sizeof(time_t) == sizeof(int64_t)) { return (time_t)INT64_MAX; } else { return (time_t)INT32_MAX; } } #endif } static int64_t get_time_t_min(void) { #if defined(TIME_T_MIN) return TIME_T_MIN; #else if (((time_t)0) < ((time_t)-1)) { /* Time_t is unsigned */ return (time_t)0; } else { /* Time_t is signed. */ if (sizeof(time_t) == sizeof(int64_t)) { return (time_t)INT64_MIN; } else { return (time_t)INT32_MIN; } } #endif } #ifdef HAVE_STRNLEN #define mtree_strnlen(a,b) strnlen(a,b) #else static size_t mtree_strnlen(const char *p, size_t maxlen) { size_t i; for (i = 0; i <= maxlen; i++) { if (p[i] == 0) break; } if (i > maxlen) return (-1);/* invalid */ return (i); } #endif static int archive_read_format_mtree_options(struct archive_read *a, const char *key, const char *val) { struct mtree *mtree; mtree = (struct mtree *)(a->format->data); if (strcmp(key, "checkfs") == 0) { /* Allows to read information missing from the mtree from the file system */ if (val == NULL || val[0] == 0) { mtree->checkfs = 0; } else { mtree->checkfs = 1; } return (ARCHIVE_OK); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } static void free_options(struct mtree_option *head) { struct mtree_option *next; for (; head != NULL; head = next) { next = head->next; free(head->value); free(head); } } static int mtree_cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) { const struct mtree_entry *e1 = (const struct mtree_entry *)n1; const struct mtree_entry *e2 = (const struct mtree_entry *)n2; return (strcmp(e1->name, e2->name)); } static int mtree_cmp_key(const struct archive_rb_node *n, const void *key) { const struct mtree_entry *e = (const struct mtree_entry *)n; return (strcmp(e->name, key)); } int archive_read_support_format_mtree(struct archive *_a) { static const struct archive_rb_tree_ops rb_ops = { mtree_cmp_node, mtree_cmp_key, }; struct archive_read *a = (struct archive_read *)_a; struct mtree *mtree; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_mtree"); mtree = (struct mtree *)calloc(1, sizeof(*mtree)); if (mtree == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate mtree data"); return (ARCHIVE_FATAL); } mtree->checkfs = 0; mtree->fd = -1; __archive_rb_tree_init(&mtree->rbtree, &rb_ops); r = __archive_read_register_format(a, mtree, "mtree", mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL); if (r != ARCHIVE_OK) free(mtree); return (ARCHIVE_OK); } static int cleanup(struct archive_read *a) { struct mtree *mtree; struct mtree_entry *p, *q; mtree = (struct mtree *)(a->format->data); p = mtree->entries; while (p != NULL) { q = p->next; free(p->name); free_options(p->options); free(p); p = q; } archive_string_free(&mtree->line); archive_string_free(&mtree->current_dir); archive_string_free(&mtree->contents_name); archive_entry_linkresolver_free(mtree->resolver); free(mtree->buff); free(mtree); (a->format->data) = NULL; return (ARCHIVE_OK); } static ssize_t get_line_size(const char *b, ssize_t avail, ssize_t *nlsize) { ssize_t len; len = 0; while (len < avail) { switch (*b) { case '\0':/* Non-ascii character or control character. */ if (nlsize != NULL) *nlsize = 0; return (-1); case '\r': if (avail-len > 1 && b[1] == '\n') { if (nlsize != NULL) *nlsize = 2; return (len+2); } /* FALL THROUGH */ case '\n': if (nlsize != NULL) *nlsize = 1; return (len+1); default: b++; len++; break; } } if (nlsize != NULL) *nlsize = 0; return (avail); } /* * <---------------- ravail ---------------------> * <-- diff ------> <--- avail -----------------> * <---- len -----------> * | Previous lines | line being parsed nl extra | * ^ * b * */ static ssize_t next_line(struct archive_read *a, const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl) { ssize_t len; int quit; quit = 0; if (*avail == 0) { *nl = 0; len = 0; } else len = get_line_size(*b, *avail, nl); /* * Read bytes more while it does not reach the end of line. */ while (*nl == 0 && len == *avail && !quit) { ssize_t diff = *ravail - *avail; size_t nbytes_req = (*ravail+1023) & ~1023U; ssize_t tested; /* * Place an arbitrary limit on the line length. * mtree is almost free-form input and without line length limits, * it can consume a lot of memory. */ if (len >= MAX_LINE_LEN) return (-1); /* Increase reading bytes if it is not enough to at least * new two lines. */ if (nbytes_req < (size_t)*ravail + 160) nbytes_req <<= 1; *b = __archive_read_ahead(a, nbytes_req, avail); if (*b == NULL) { if (*ravail >= *avail) return (0); /* Reading bytes reaches the end of file. */ *b = __archive_read_ahead(a, *avail, avail); quit = 1; } *ravail = *avail; *b += diff; *avail -= diff; tested = len;/* Skip some bytes we already determined. */ len = get_line_size(*b + len, *avail - len, nl); if (len >= 0) len += tested; } return (len); } /* * Compare characters with a mtree keyword. * Returns the length of a mtree keyword if matched. * Returns 0 if not matched. */ static int bid_keycmp(const char *p, const char *key, ssize_t len) { int match_len = 0; while (len > 0 && *p && *key) { if (*p == *key) { --len; ++p; ++key; ++match_len; continue; } return (0);/* Not match */ } if (*key != '\0') return (0);/* Not match */ /* A following character should be specified characters */ if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' || p[0] == '\n' || p[0] == '\r' || (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))) return (match_len); return (0);/* Not match */ } /* * Test whether the characters 'p' has is mtree keyword. * Returns the length of a detected keyword. * Returns 0 if any keywords were not found. */ static int bid_keyword(const char *p, ssize_t len) { static const char * const keys_c[] = { "content", "contents", "cksum", NULL }; static const char * const keys_df[] = { "device", "flags", NULL }; static const char * const keys_g[] = { "gid", "gname", NULL }; static const char * const keys_il[] = { "ignore", "inode", "link", NULL }; static const char * const keys_m[] = { "md5", "md5digest", "mode", NULL }; static const char * const keys_no[] = { "nlink", "nochange", "optional", NULL }; static const char * const keys_r[] = { "resdevice", "rmd160", "rmd160digest", NULL }; static const char * const keys_s[] = { "sha1", "sha1digest", "sha256", "sha256digest", "sha384", "sha384digest", "sha512", "sha512digest", "size", NULL }; static const char * const keys_t[] = { "tags", "time", "type", NULL }; static const char * const keys_u[] = { "uid", "uname", NULL }; const char * const *keys; int i; switch (*p) { case 'c': keys = keys_c; break; case 'd': case 'f': keys = keys_df; break; case 'g': keys = keys_g; break; case 'i': case 'l': keys = keys_il; break; case 'm': keys = keys_m; break; case 'n': case 'o': keys = keys_no; break; case 'r': keys = keys_r; break; case 's': keys = keys_s; break; case 't': keys = keys_t; break; case 'u': keys = keys_u; break; default: return (0);/* Unknown key */ } for (i = 0; keys[i] != NULL; i++) { int l = bid_keycmp(p, keys[i], len); if (l > 0) return (l); } return (0);/* Unknown key */ } /* * Test whether there is a set of mtree keywords. * Returns the number of keyword. * Returns -1 if we got incorrect sequence. * This function expects a set of "keyword=value". * When "unset" is specified, expects a set of "keyword". */ static int bid_keyword_list(const char *p, ssize_t len, int unset, int last_is_path) { int l; int keycnt = 0; while (len > 0 && *p) { int blank = 0; /* Test whether there are blank characters in the line. */ while (len >0 && (*p == ' ' || *p == '\t')) { ++p; --len; blank = 1; } if (*p == '\n' || *p == '\r') break; if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')) break; if (!blank && !last_is_path) /* No blank character. */ return (-1); if (last_is_path && len == 0) return (keycnt); if (unset) { l = bid_keycmp(p, "all", len); if (l > 0) return (1); } /* Test whether there is a correct key in the line. */ l = bid_keyword(p, len); if (l == 0) return (-1);/* Unknown keyword was found. */ p += l; len -= l; keycnt++; /* Skip value */ if (*p == '=') { int value = 0; ++p; --len; while (len > 0 && *p != ' ' && *p != '\t') { ++p; --len; value = 1; } /* A keyword should have a its value unless * "/unset" operation. */ if (!unset && value == 0) return (-1); } } return (keycnt); } static int bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path) { int f = 0; static const unsigned char safe_char[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ /* !"$%&'()*+,-./ EXCLUSION:( )(#) */ 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */ /* 0123456789:;<>? EXCLUSION:(=) */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */ /* @ABCDEFGHIJKLMNO */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */ /* PQRSTUVWXYZ[\]^_ */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */ /* `abcdefghijklmno */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */ /* pqrstuvwxyz{|}~ */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ }; ssize_t ll; const char *pp = p; const char * const pp_end = pp + len; *last_is_path = 0; /* * Skip the path-name which is quoted. */ for (;pp < pp_end; ++pp) { if (!safe_char[*(const unsigned char *)pp]) { if (*pp != ' ' && *pp != '\t' && *pp != '\r' && *pp != '\n') f = 0; break; } f = 1; } ll = pp_end - pp; /* If a path-name was not found at the first, try to check * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates, * which places the path-name at the last. */ if (f == 0) { const char *pb = p + len - nl; int name_len = 0; int slash; /* The form D accepts only a single line for an entry. */ if (pb-2 >= p && pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t')) return (-1); if (pb-1 >= p && pb[-1] == '\\') return (-1); slash = 0; while (p <= --pb && *pb != ' ' && *pb != '\t') { if (!safe_char[*(const unsigned char *)pb]) return (-1); name_len++; /* The pathname should have a slash in this * format. */ if (*pb == '/') slash = 1; } if (name_len == 0 || slash == 0) return (-1); /* If '/' is placed at the first in this field, this is not * a valid filename. */ if (pb[1] == '/') return (-1); ll = len - nl - name_len; pp = p; *last_is_path = 1; } return (bid_keyword_list(pp, ll, 0, *last_is_path)); } #define MAX_BID_ENTRY 3 static int mtree_bid(struct archive_read *a, int best_bid) { const char *signature = "#mtree"; const char *p; (void)best_bid; /* UNUSED */ /* Now let's look at the actual header and see if it matches. */ p = __archive_read_ahead(a, strlen(signature), NULL); if (p == NULL) return (-1); if (memcmp(p, signature, strlen(signature)) == 0) return (8 * (int)strlen(signature)); /* * There is not a mtree signature. Let's try to detect mtree format. */ return (detect_form(a, NULL)); } static int detect_form(struct archive_read *a, int *is_form_d) { const char *p; ssize_t avail, ravail; - ssize_t detected_bytes = 0, len, nl; + ssize_t len, nl; int entry_cnt = 0, multiline = 0; int form_D = 0;/* The archive is generated by `NetBSD mtree -D' * (In this source we call it `form D') . */ if (is_form_d != NULL) *is_form_d = 0; p = __archive_read_ahead(a, 1, &avail); if (p == NULL) return (-1); ravail = avail; for (;;) { len = next_line(a, &p, &avail, &ravail, &nl); /* The terminal character of the line should be * a new line character, '\r\n' or '\n'. */ if (len <= 0 || nl == 0) break; if (!multiline) { /* Leading whitespace is never significant, * ignore it. */ while (len > 0 && (*p == ' ' || *p == '\t')) { ++p; --avail; --len; } /* Skip comment or empty line. */ if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') { p += len; avail -= len; continue; } } else { /* A continuance line; the terminal * character of previous line was '\' character. */ if (bid_keyword_list(p, len, 0, 0) <= 0) break; - if (multiline == 1) - detected_bytes += len; if (p[len-nl-1] != '\\') { if (multiline == 1 && ++entry_cnt >= MAX_BID_ENTRY) break; multiline = 0; } p += len; avail -= len; continue; } if (p[0] != '/') { int last_is_path, keywords; keywords = bid_entry(p, len, nl, &last_is_path); if (keywords >= 0) { - detected_bytes += len; if (form_D == 0) { if (last_is_path) form_D = 1; else if (keywords > 0) /* This line is not `form D'. */ form_D = -1; } else if (form_D == 1) { if (!last_is_path && keywords > 0) /* This this is not `form D' * and We cannot accept mixed * format. */ break; } if (!last_is_path && p[len-nl-1] == '\\') /* This line continues. */ multiline = 1; else { /* We've got plenty of correct lines * to assume that this file is a mtree * format. */ if (++entry_cnt >= MAX_BID_ENTRY) break; } } else break; } else if (len > 4 && strncmp(p, "/set", 4) == 0) { if (bid_keyword_list(p+4, len-4, 0, 0) <= 0) break; /* This line continues. */ if (p[len-nl-1] == '\\') multiline = 2; } else if (len > 6 && strncmp(p, "/unset", 6) == 0) { if (bid_keyword_list(p+6, len-6, 1, 0) <= 0) break; /* This line continues. */ if (p[len-nl-1] == '\\') multiline = 2; } else break; /* Test next line. */ p += len; avail -= len; } if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) { if (is_form_d != NULL) { if (form_D == 1) *is_form_d = 1; } return (32); } return (0); } /* * The extended mtree format permits multiple lines specifying * attributes for each file. For those entries, only the last line * is actually used. Practically speaking, that means we have * to read the entire mtree file into memory up front. * * The parsing is done in two steps. First, it is decided if a line * changes the global defaults and if it is, processed accordingly. * Otherwise, the options of the line are merged with the current * global options. */ static int add_option(struct archive_read *a, struct mtree_option **global, const char *value, size_t len) { struct mtree_option *opt; if ((opt = malloc(sizeof(*opt))) == NULL) { archive_set_error(&a->archive, errno, "Can't allocate memory"); return (ARCHIVE_FATAL); } if ((opt->value = malloc(len + 1)) == NULL) { free(opt); archive_set_error(&a->archive, errno, "Can't allocate memory"); return (ARCHIVE_FATAL); } memcpy(opt->value, value, len); opt->value[len] = '\0'; opt->next = *global; *global = opt; return (ARCHIVE_OK); } static void remove_option(struct mtree_option **global, const char *value, size_t len) { struct mtree_option *iter, *last; last = NULL; for (iter = *global; iter != NULL; last = iter, iter = iter->next) { if (strncmp(iter->value, value, len) == 0 && (iter->value[len] == '\0' || iter->value[len] == '=')) break; } if (iter == NULL) return; if (last == NULL) *global = iter->next; else last->next = iter->next; free(iter->value); free(iter); } static int process_global_set(struct archive_read *a, struct mtree_option **global, const char *line) { const char *next, *eq; size_t len; int r; line += 4; for (;;) { next = line + strspn(line, " \t\r\n"); if (*next == '\0') return (ARCHIVE_OK); line = next; next = line + strcspn(line, " \t\r\n"); eq = strchr(line, '='); if (eq > next) len = next - line; else len = eq - line; remove_option(global, line, len); r = add_option(a, global, line, next - line); if (r != ARCHIVE_OK) return (r); line = next; } } static int process_global_unset(struct archive_read *a, struct mtree_option **global, const char *line) { const char *next; size_t len; line += 6; if (strchr(line, '=') != NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "/unset shall not contain `='"); return ARCHIVE_FATAL; } for (;;) { next = line + strspn(line, " \t\r\n"); if (*next == '\0') return (ARCHIVE_OK); line = next; len = strcspn(line, " \t\r\n"); if (len == 3 && strncmp(line, "all", 3) == 0) { free_options(*global); *global = NULL; } else { remove_option(global, line, len); } line += len; } } static int process_add_entry(struct archive_read *a, struct mtree *mtree, struct mtree_option **global, const char *line, ssize_t line_len, struct mtree_entry **last_entry, int is_form_d) { struct mtree_entry *entry; struct mtree_option *iter; const char *next, *eq, *name, *end; size_t name_len, len; int r, i; if ((entry = malloc(sizeof(*entry))) == NULL) { archive_set_error(&a->archive, errno, "Can't allocate memory"); return (ARCHIVE_FATAL); } entry->next = NULL; entry->options = NULL; entry->name = NULL; entry->used = 0; entry->full = 0; /* Add this entry to list. */ if (*last_entry == NULL) mtree->entries = entry; else (*last_entry)->next = entry; *last_entry = entry; if (is_form_d) { /* Filename is last item on line. */ /* Adjust line_len to trim trailing whitespace */ while (line_len > 0) { char last_character = line[line_len - 1]; if (last_character == '\r' || last_character == '\n' || last_character == '\t' || last_character == ' ') { line_len--; } else { break; } } /* Name starts after the last whitespace separator */ name = line; for (i = 0; i < line_len; i++) { if (line[i] == '\r' || line[i] == '\n' || line[i] == '\t' || line[i] == ' ') { name = line + i + 1; } } name_len = line + line_len - name; end = name; } else { /* Filename is first item on line */ name_len = strcspn(line, " \t\r\n"); name = line; line += name_len; end = line + line_len; } /* name/name_len is the name within the line. */ /* line..end brackets the entire line except the name */ if ((entry->name = malloc(name_len + 1)) == NULL) { archive_set_error(&a->archive, errno, "Can't allocate memory"); return (ARCHIVE_FATAL); } memcpy(entry->name, name, name_len); entry->name[name_len] = '\0'; parse_escapes(entry->name, entry); entry->next_dup = NULL; if (entry->full) { if (!__archive_rb_tree_insert_node(&mtree->rbtree, &entry->rbnode)) { struct mtree_entry *alt; alt = (struct mtree_entry *)__archive_rb_tree_find_node( &mtree->rbtree, entry->name); while (alt->next_dup) alt = alt->next_dup; alt->next_dup = entry; } } for (iter = *global; iter != NULL; iter = iter->next) { r = add_option(a, &entry->options, iter->value, strlen(iter->value)); if (r != ARCHIVE_OK) return (r); } for (;;) { next = line + strspn(line, " \t\r\n"); if (*next == '\0') return (ARCHIVE_OK); if (next >= end) return (ARCHIVE_OK); line = next; next = line + strcspn(line, " \t\r\n"); eq = strchr(line, '='); if (eq == NULL || eq > next) len = next - line; else len = eq - line; remove_option(&entry->options, line, len); r = add_option(a, &entry->options, line, next - line); if (r != ARCHIVE_OK) return (r); line = next; } } static int read_mtree(struct archive_read *a, struct mtree *mtree) { ssize_t len; uintmax_t counter; char *p, *s; struct mtree_option *global; struct mtree_entry *last_entry; int r, is_form_d; mtree->archive_format = ARCHIVE_FORMAT_MTREE; mtree->archive_format_name = "mtree"; global = NULL; last_entry = NULL; (void)detect_form(a, &is_form_d); for (counter = 1; ; ++counter) { r = ARCHIVE_OK; len = readline(a, mtree, &p, 65536); if (len == 0) { mtree->this_entry = mtree->entries; free_options(global); return (ARCHIVE_OK); } if (len < 0) { free_options(global); return ((int)len); } /* Leading whitespace is never significant, ignore it. */ while (*p == ' ' || *p == '\t') { ++p; --len; } /* Skip content lines and blank lines. */ if (*p == '#') continue; if (*p == '\r' || *p == '\n' || *p == '\0') continue; /* Non-printable characters are not allowed */ for (s = p;s < p + len - 1; s++) { if (!isprint((unsigned char)*s)) { r = ARCHIVE_FATAL; break; } } if (r != ARCHIVE_OK) break; if (*p != '/') { r = process_add_entry(a, mtree, &global, p, len, &last_entry, is_form_d); } else if (len > 4 && strncmp(p, "/set", 4) == 0) { if (p[4] != ' ' && p[4] != '\t') break; r = process_global_set(a, &global, p); } else if (len > 6 && strncmp(p, "/unset", 6) == 0) { if (p[6] != ' ' && p[6] != '\t') break; r = process_global_unset(a, &global, p); } else break; if (r != ARCHIVE_OK) { free_options(global); return r; } } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't parse line %ju", counter); free_options(global); return (ARCHIVE_FATAL); } /* * Read in the entire mtree file into memory on the first request. * Then use the next unused file to satisfy each header request. */ static int read_header(struct archive_read *a, struct archive_entry *entry) { struct mtree *mtree; char *p; int r, use_next; mtree = (struct mtree *)(a->format->data); if (mtree->fd >= 0) { close(mtree->fd); mtree->fd = -1; } if (mtree->entries == NULL) { mtree->resolver = archive_entry_linkresolver_new(); if (mtree->resolver == NULL) return ARCHIVE_FATAL; archive_entry_linkresolver_set_strategy(mtree->resolver, ARCHIVE_FORMAT_MTREE); r = read_mtree(a, mtree); if (r != ARCHIVE_OK) return (r); } a->archive.archive_format = mtree->archive_format; a->archive.archive_format_name = mtree->archive_format_name; for (;;) { if (mtree->this_entry == NULL) return (ARCHIVE_EOF); if (strcmp(mtree->this_entry->name, "..") == 0) { mtree->this_entry->used = 1; if (archive_strlen(&mtree->current_dir) > 0) { /* Roll back current path. */ p = mtree->current_dir.s + mtree->current_dir.length - 1; while (p >= mtree->current_dir.s && *p != '/') --p; if (p >= mtree->current_dir.s) --p; mtree->current_dir.length = p - mtree->current_dir.s + 1; } } if (!mtree->this_entry->used) { use_next = 0; r = parse_file(a, entry, mtree, mtree->this_entry, &use_next); if (use_next == 0) return (r); } mtree->this_entry = mtree->this_entry->next; } } /* * A single file can have multiple lines contribute specifications. * Parse as many lines as necessary, then pull additional information * from a backing file on disk as necessary. */ static int parse_file(struct archive_read *a, struct archive_entry *entry, struct mtree *mtree, struct mtree_entry *mentry, int *use_next) { const char *path; struct stat st_storage, *st; struct mtree_entry *mp; struct archive_entry *sparse_entry; int r = ARCHIVE_OK, r1, parsed_kws; mentry->used = 1; /* Initialize reasonable defaults. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); archive_string_empty(&mtree->contents_name); /* Parse options from this line. */ parsed_kws = 0; r = parse_line(a, entry, mtree, mentry, &parsed_kws); if (mentry->full) { archive_entry_copy_pathname(entry, mentry->name); /* * "Full" entries are allowed to have multiple lines * and those lines aren't required to be adjacent. We * don't support multiple lines for "relative" entries * nor do we make any attempt to merge data from * separate "relative" and "full" entries. (Merging * "relative" and "full" entries would require dealing * with pathname canonicalization, which is a very * tricky subject.) */ mp = (struct mtree_entry *)__archive_rb_tree_find_node( &mtree->rbtree, mentry->name); for (; mp; mp = mp->next_dup) { if (mp->full && !mp->used) { /* Later lines override earlier ones. */ mp->used = 1; r1 = parse_line(a, entry, mtree, mp, &parsed_kws); if (r1 < r) r = r1; } } } else { /* * Relative entries require us to construct * the full path and possibly update the * current directory. */ size_t n = archive_strlen(&mtree->current_dir); if (n > 0) archive_strcat(&mtree->current_dir, "/"); archive_strcat(&mtree->current_dir, mentry->name); archive_entry_copy_pathname(entry, mtree->current_dir.s); if (archive_entry_filetype(entry) != AE_IFDIR) mtree->current_dir.length = n; } if (mtree->checkfs) { /* * Try to open and stat the file to get the real size * and other file info. It would be nice to avoid * this here so that getting a listing of an mtree * wouldn't require opening every referenced contents * file. But then we wouldn't know the actual * contents size, so I don't see a really viable way * around this. (Also, we may want to someday pull * other unspecified info from the contents file on * disk.) */ mtree->fd = -1; if (archive_strlen(&mtree->contents_name) > 0) path = mtree->contents_name.s; else path = archive_entry_pathname(entry); if (archive_entry_filetype(entry) == AE_IFREG || archive_entry_filetype(entry) == AE_IFDIR) { mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC); __archive_ensure_cloexec_flag(mtree->fd); if (mtree->fd == -1 && (errno != ENOENT || archive_strlen(&mtree->contents_name) > 0)) { archive_set_error(&a->archive, errno, "Can't open %s", path); r = ARCHIVE_WARN; } } st = &st_storage; if (mtree->fd >= 0) { if (fstat(mtree->fd, st) == -1) { archive_set_error(&a->archive, errno, "Could not fstat %s", path); r = ARCHIVE_WARN; /* If we can't stat it, don't keep it open. */ close(mtree->fd); mtree->fd = -1; st = NULL; } } else if (lstat(path, st) == -1) { st = NULL; } /* * Check for a mismatch between the type in the specification * and the type of the contents object on disk. */ if (st != NULL) { if (((st->st_mode & S_IFMT) == S_IFREG && archive_entry_filetype(entry) == AE_IFREG) #ifdef S_IFLNK ||((st->st_mode & S_IFMT) == S_IFLNK && archive_entry_filetype(entry) == AE_IFLNK) #endif #ifdef S_IFSOCK ||((st->st_mode & S_IFSOCK) == S_IFSOCK && archive_entry_filetype(entry) == AE_IFSOCK) #endif #ifdef S_IFCHR ||((st->st_mode & S_IFMT) == S_IFCHR && archive_entry_filetype(entry) == AE_IFCHR) #endif #ifdef S_IFBLK ||((st->st_mode & S_IFMT) == S_IFBLK && archive_entry_filetype(entry) == AE_IFBLK) #endif ||((st->st_mode & S_IFMT) == S_IFDIR && archive_entry_filetype(entry) == AE_IFDIR) #ifdef S_IFIFO ||((st->st_mode & S_IFMT) == S_IFIFO && archive_entry_filetype(entry) == AE_IFIFO) #endif ) { /* Types match. */ } else { /* Types don't match; bail out gracefully. */ if (mtree->fd >= 0) close(mtree->fd); mtree->fd = -1; if (parsed_kws & MTREE_HAS_OPTIONAL) { /* It's not an error for an optional * entry to not match disk. */ *use_next = 1; } else if (r == ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "mtree specification has different" " type for %s", archive_entry_pathname(entry)); r = ARCHIVE_WARN; } return (r); } } /* * If there is a contents file on disk, pick some of the * metadata from that file. For most of these, we only * set it from the contents if it wasn't already parsed * from the specification. */ if (st != NULL) { if (((parsed_kws & MTREE_HAS_DEVICE) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) && (archive_entry_filetype(entry) == AE_IFCHR || archive_entry_filetype(entry) == AE_IFBLK)) archive_entry_set_rdev(entry, st->st_rdev); if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME)) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) archive_entry_set_gid(entry, st->st_gid); if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME)) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) archive_entry_set_uid(entry, st->st_uid); if ((parsed_kws & MTREE_HAS_MTIME) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) { #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC archive_entry_set_mtime(entry, st->st_mtime, st->st_mtimespec.tv_nsec); #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC archive_entry_set_mtime(entry, st->st_mtime, st->st_mtim.tv_nsec); #elif HAVE_STRUCT_STAT_ST_MTIME_N archive_entry_set_mtime(entry, st->st_mtime, st->st_mtime_n); #elif HAVE_STRUCT_STAT_ST_UMTIME archive_entry_set_mtime(entry, st->st_mtime, st->st_umtime*1000); #elif HAVE_STRUCT_STAT_ST_MTIME_USEC archive_entry_set_mtime(entry, st->st_mtime, st->st_mtime_usec*1000); #else archive_entry_set_mtime(entry, st->st_mtime, 0); #endif } if ((parsed_kws & MTREE_HAS_NLINK) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) archive_entry_set_nlink(entry, st->st_nlink); if ((parsed_kws & MTREE_HAS_PERM) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) archive_entry_set_perm(entry, st->st_mode); if ((parsed_kws & MTREE_HAS_SIZE) == 0 || (parsed_kws & MTREE_HAS_NOCHANGE) != 0) archive_entry_set_size(entry, st->st_size); archive_entry_set_ino(entry, st->st_ino); archive_entry_set_dev(entry, st->st_dev); archive_entry_linkify(mtree->resolver, &entry, &sparse_entry); } else if (parsed_kws & MTREE_HAS_OPTIONAL) { /* * Couldn't open the entry, stat it or the on-disk type * didn't match. If this entry is optional, just * ignore it and read the next header entry. */ *use_next = 1; return ARCHIVE_OK; } } mtree->cur_size = archive_entry_size(entry); mtree->offset = 0; return r; } /* * Each line contains a sequence of keywords. */ static int parse_line(struct archive_read *a, struct archive_entry *entry, struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws) { struct mtree_option *iter; int r = ARCHIVE_OK, r1; for (iter = mp->options; iter != NULL; iter = iter->next) { r1 = parse_keyword(a, mtree, entry, iter, parsed_kws); if (r1 < r) r = r1; } if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Missing type keyword in mtree specification"); return (ARCHIVE_WARN); } return (r); } /* * Device entries have one of the following forms: * - raw dev_t * - format,major,minor[,subdevice] * When parsing succeeded, `pdev' will contain the appropriate dev_t value. */ /* strsep() is not in C90, but strcspn() is. */ /* Taken from http://unixpapa.com/incnote/string.html */ static char * la_strsep(char **sp, const char *sep) { char *p, *s; if (sp == NULL || *sp == NULL || **sp == '\0') return(NULL); s = *sp; p = s + strcspn(s, sep); if (*p != '\0') *p++ = '\0'; *sp = p; return(s); } static int parse_device(dev_t *pdev, struct archive *a, char *val) { #define MAX_PACK_ARGS 3 unsigned long numbers[MAX_PACK_ARGS]; char *p, *dev; int argc; pack_t *pack; dev_t result; const char *error = NULL; memset(pdev, 0, sizeof(*pdev)); if ((dev = strchr(val, ',')) != NULL) { /* * Device's major/minor are given in a specified format. * Decode and pack it accordingly. */ *dev++ = '\0'; if ((pack = pack_find(val)) == NULL) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown format `%s'", val); return ARCHIVE_WARN; } argc = 0; while ((p = la_strsep(&dev, ",")) != NULL) { if (*p == '\0') { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Missing number"); return ARCHIVE_WARN; } if (argc >= MAX_PACK_ARGS) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Too many arguments"); return ARCHIVE_WARN; } numbers[argc++] = (unsigned long)mtree_atol(&p, 0); } if (argc < 2) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Not enough arguments"); return ARCHIVE_WARN; } result = (*pack)(argc, numbers, &error); if (error != NULL) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "%s", error); return ARCHIVE_WARN; } } else { /* file system raw value. */ result = (dev_t)mtree_atol(&val, 0); } *pdev = result; return ARCHIVE_OK; #undef MAX_PACK_ARGS } static int parse_hex_nibble(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return 10 + c - 'a'; #if 0 /* XXX: Is uppercase something we should support? */ if (c >= 'A' && c <= 'F') return 10 + c - 'A'; #endif return -1; } static int parse_digest(struct archive_read *a, struct archive_entry *entry, const char *digest, int type) { unsigned char digest_buf[64]; int high, low; size_t i, j, len; switch (type) { case ARCHIVE_ENTRY_DIGEST_MD5: len = sizeof(entry->digest.md5); break; case ARCHIVE_ENTRY_DIGEST_RMD160: len = sizeof(entry->digest.rmd160); break; case ARCHIVE_ENTRY_DIGEST_SHA1: len = sizeof(entry->digest.sha1); break; case ARCHIVE_ENTRY_DIGEST_SHA256: len = sizeof(entry->digest.sha256); break; case ARCHIVE_ENTRY_DIGEST_SHA384: len = sizeof(entry->digest.sha384); break; case ARCHIVE_ENTRY_DIGEST_SHA512: len = sizeof(entry->digest.sha512); break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Internal error: Unknown digest type"); return ARCHIVE_FATAL; } if (len > sizeof(digest_buf)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Internal error: Digest storage too large"); return ARCHIVE_FATAL; } len *= 2; if (mtree_strnlen(digest, len+1) != len) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "incorrect digest length, ignoring"); return ARCHIVE_WARN; } for (i = 0, j = 0; i < len; i += 2, j++) { high = parse_hex_nibble(digest[i]); low = parse_hex_nibble(digest[i+1]); if (high == -1 || low == -1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "invalid digest data, ignoring"); return ARCHIVE_WARN; } digest_buf[j] = high << 4 | low; } return archive_entry_set_digest(entry, type, digest_buf); } /* * Parse a single keyword and its value. */ static int parse_keyword(struct archive_read *a, struct mtree *mtree, struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws) { char *val, *key; key = opt->value; if (*key == '\0') return (ARCHIVE_OK); if (strcmp(key, "nochange") == 0) { *parsed_kws |= MTREE_HAS_NOCHANGE; return (ARCHIVE_OK); } if (strcmp(key, "optional") == 0) { *parsed_kws |= MTREE_HAS_OPTIONAL; return (ARCHIVE_OK); } if (strcmp(key, "ignore") == 0) { /* * The mtree processing is not recursive, so * recursion will only happen for explicitly listed * entries. */ return (ARCHIVE_OK); } val = strchr(key, '='); if (val == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Malformed attribute \"%s\" (%d)", key, key[0]); return (ARCHIVE_WARN); } *val = '\0'; ++val; switch (key[0]) { case 'c': if (strcmp(key, "content") == 0 || strcmp(key, "contents") == 0) { parse_escapes(val, NULL); archive_strcpy(&mtree->contents_name, val); return (ARCHIVE_OK); } if (strcmp(key, "cksum") == 0) return (ARCHIVE_OK); break; case 'd': if (strcmp(key, "device") == 0) { /* stat(2) st_rdev field, e.g. the major/minor IDs * of a char/block special file */ int r; dev_t dev; *parsed_kws |= MTREE_HAS_DEVICE; r = parse_device(&dev, &a->archive, val); if (r == ARCHIVE_OK) archive_entry_set_rdev(entry, dev); return r; } break; case 'f': if (strcmp(key, "flags") == 0) { *parsed_kws |= MTREE_HAS_FFLAGS; archive_entry_copy_fflags_text(entry, val); return (ARCHIVE_OK); } break; case 'g': if (strcmp(key, "gid") == 0) { *parsed_kws |= MTREE_HAS_GID; archive_entry_set_gid(entry, mtree_atol(&val, 10)); return (ARCHIVE_OK); } if (strcmp(key, "gname") == 0) { *parsed_kws |= MTREE_HAS_GNAME; archive_entry_copy_gname(entry, val); return (ARCHIVE_OK); } break; case 'i': if (strcmp(key, "inode") == 0) { archive_entry_set_ino(entry, mtree_atol(&val, 10)); return (ARCHIVE_OK); } break; case 'l': if (strcmp(key, "link") == 0) { parse_escapes(val, NULL); archive_entry_copy_symlink(entry, val); return (ARCHIVE_OK); } break; case 'm': if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_MD5); } if (strcmp(key, "mode") == 0) { if (val[0] < '0' || val[0] > '7') { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Symbolic or non-octal mode \"%s\" unsupported", val); return (ARCHIVE_WARN); } *parsed_kws |= MTREE_HAS_PERM; archive_entry_set_perm(entry, (mode_t)mtree_atol(&val, 8)); return (ARCHIVE_OK); } break; case 'n': if (strcmp(key, "nlink") == 0) { *parsed_kws |= MTREE_HAS_NLINK; archive_entry_set_nlink(entry, (unsigned int)mtree_atol(&val, 10)); return (ARCHIVE_OK); } break; case 'r': if (strcmp(key, "resdevice") == 0) { /* stat(2) st_dev field, e.g. the device ID where the * inode resides */ int r; dev_t dev; r = parse_device(&dev, &a->archive, val); if (r == ARCHIVE_OK) archive_entry_set_dev(entry, dev); return r; } if (strcmp(key, "rmd160") == 0 || strcmp(key, "rmd160digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_RMD160); } break; case 's': if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_SHA1); } if (strcmp(key, "sha256") == 0 || strcmp(key, "sha256digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_SHA256); } if (strcmp(key, "sha384") == 0 || strcmp(key, "sha384digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_SHA384); } if (strcmp(key, "sha512") == 0 || strcmp(key, "sha512digest") == 0) { return parse_digest(a, entry, val, ARCHIVE_ENTRY_DIGEST_SHA512); } if (strcmp(key, "size") == 0) { archive_entry_set_size(entry, mtree_atol(&val, 10)); return (ARCHIVE_OK); } break; case 't': if (strcmp(key, "tags") == 0) { /* * Comma delimited list of tags. * Ignore the tags for now, but the interface * should be extended to allow inclusion/exclusion. */ return (ARCHIVE_OK); } if (strcmp(key, "time") == 0) { int64_t m; int64_t my_time_t_max = get_time_t_max(); int64_t my_time_t_min = get_time_t_min(); long ns = 0; *parsed_kws |= MTREE_HAS_MTIME; m = mtree_atol(&val, 10); /* Replicate an old mtree bug: * 123456789.1 represents 123456789 * seconds and 1 nanosecond. */ if (*val == '.') { ++val; ns = (long)mtree_atol(&val, 10); if (ns < 0) ns = 0; else if (ns > 999999999) ns = 999999999; } if (m > my_time_t_max) m = my_time_t_max; else if (m < my_time_t_min) m = my_time_t_min; archive_entry_set_mtime(entry, (time_t)m, ns); return (ARCHIVE_OK); } if (strcmp(key, "type") == 0) { switch (val[0]) { case 'b': if (strcmp(val, "block") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFBLK); return (ARCHIVE_OK); } break; case 'c': if (strcmp(val, "char") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFCHR); return (ARCHIVE_OK); } break; case 'd': if (strcmp(val, "dir") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFDIR); return (ARCHIVE_OK); } break; case 'f': if (strcmp(val, "fifo") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFIFO); return (ARCHIVE_OK); } if (strcmp(val, "file") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFREG); return (ARCHIVE_OK); } break; case 'l': if (strcmp(val, "link") == 0) { *parsed_kws |= MTREE_HAS_TYPE; archive_entry_set_filetype(entry, AE_IFLNK); return (ARCHIVE_OK); } break; default: break; } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized file type \"%s\"; " "assuming \"file\"", val); archive_entry_set_filetype(entry, AE_IFREG); return (ARCHIVE_WARN); } break; case 'u': if (strcmp(key, "uid") == 0) { *parsed_kws |= MTREE_HAS_UID; archive_entry_set_uid(entry, mtree_atol(&val, 10)); return (ARCHIVE_OK); } if (strcmp(key, "uname") == 0) { *parsed_kws |= MTREE_HAS_UNAME; archive_entry_copy_uname(entry, val); return (ARCHIVE_OK); } break; default: break; } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized key %s=%s", key, val); return (ARCHIVE_WARN); } static int read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { size_t bytes_to_read; ssize_t bytes_read; struct mtree *mtree; mtree = (struct mtree *)(a->format->data); if (mtree->fd < 0) { *buff = NULL; *offset = 0; *size = 0; return (ARCHIVE_EOF); } if (mtree->buff == NULL) { mtree->buffsize = 64 * 1024; mtree->buff = malloc(mtree->buffsize); if (mtree->buff == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory"); return (ARCHIVE_FATAL); } } *buff = mtree->buff; *offset = mtree->offset; if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset) bytes_to_read = (size_t)(mtree->cur_size - mtree->offset); else bytes_to_read = mtree->buffsize; bytes_read = read(mtree->fd, mtree->buff, bytes_to_read); if (bytes_read < 0) { archive_set_error(&a->archive, errno, "Can't read"); return (ARCHIVE_WARN); } if (bytes_read == 0) { *size = 0; return (ARCHIVE_EOF); } mtree->offset += bytes_read; *size = bytes_read; return (ARCHIVE_OK); } /* Skip does nothing except possibly close the contents file. */ static int skip(struct archive_read *a) { struct mtree *mtree; mtree = (struct mtree *)(a->format->data); if (mtree->fd >= 0) { close(mtree->fd); mtree->fd = -1; } return (ARCHIVE_OK); } /* * Since parsing backslash sequences always makes strings shorter, * we can always do this conversion in-place. */ static void parse_escapes(char *src, struct mtree_entry *mentry) { char *dest = src; char c; if (mentry != NULL && strcmp(src, ".") == 0) mentry->full = 1; while (*src != '\0') { c = *src++; if (c == '/' && mentry != NULL) mentry->full = 1; if (c == '\\') { switch (src[0]) { case '0': if (src[1] < '0' || src[1] > '7') { c = 0; ++src; break; } /* FALLTHROUGH */ case '1': case '2': case '3': if (src[1] >= '0' && src[1] <= '7' && src[2] >= '0' && src[2] <= '7') { c = (src[0] - '0') << 6; c |= (src[1] - '0') << 3; c |= (src[2] - '0'); src += 3; } break; case 'a': c = '\a'; ++src; break; case 'b': c = '\b'; ++src; break; case 'f': c = '\f'; ++src; break; case 'n': c = '\n'; ++src; break; case 'r': c = '\r'; ++src; break; case 's': c = ' '; ++src; break; case 't': c = '\t'; ++src; break; case 'v': c = '\v'; ++src; break; case '\\': c = '\\'; ++src; break; } } *dest++ = c; } *dest = '\0'; } /* Parse a hex digit. */ static int parsedigit(char c) { if (c >= '0' && c <= '9') return c - '0'; else if (c >= 'a' && c <= 'f') return c - 'a'; else if (c >= 'A' && c <= 'F') return c - 'A'; else return -1; } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t mtree_atol(char **p, int base) { int64_t l, limit; int digit, last_digit_limit; if (base == 0) { if (**p != '0') base = 10; else if ((*p)[1] == 'x' || (*p)[1] == 'X') { *p += 2; base = 16; } else { base = 8; } } if (**p == '-') { limit = INT64_MIN / base; last_digit_limit = -(INT64_MIN % base); ++(*p); l = 0; digit = parsedigit(**p); while (digit >= 0 && digit < base) { if (l < limit || (l == limit && digit >= last_digit_limit)) return INT64_MIN; l = (l * base) - digit; digit = parsedigit(*++(*p)); } return l; } else { limit = INT64_MAX / base; last_digit_limit = INT64_MAX % base; l = 0; digit = parsedigit(**p); while (digit >= 0 && digit < base) { if (l > limit || (l == limit && digit > last_digit_limit)) return INT64_MAX; l = (l * base) + digit; digit = parsedigit(*++(*p)); } return l; } } /* * Returns length of line (including trailing newline) * or negative on error. 'start' argument is updated to * point to first character of line. */ static ssize_t readline(struct archive_read *a, struct mtree *mtree, char **start, ssize_t limit) { ssize_t bytes_read; ssize_t total_size = 0; ssize_t find_off = 0; const void *t; void *nl; char *u; /* Accumulate line in a line buffer. */ for (;;) { /* Read some more. */ t = __archive_read_ahead(a, 1, &bytes_read); if (t == NULL) return (0); if (bytes_read < 0) return (ARCHIVE_FATAL); nl = memchr(t, '\n', bytes_read); /* If we found '\n', trim the read to end exactly there. */ if (nl != NULL) { bytes_read = ((const char *)nl) - ((const char *)t) + 1; } if (total_size + bytes_read + 1 > limit) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Line too long"); return (ARCHIVE_FATAL); } if (archive_string_ensure(&mtree->line, total_size + bytes_read + 1) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate working buffer"); return (ARCHIVE_FATAL); } /* Append new bytes to string. */ memcpy(mtree->line.s + total_size, t, bytes_read); __archive_read_consume(a, bytes_read); total_size += bytes_read; mtree->line.s[total_size] = '\0'; for (u = mtree->line.s + find_off; *u; ++u) { if (u[0] == '\n') { /* Ends with unescaped newline. */ *start = mtree->line.s; return total_size; } else if (u[0] == '#') { /* Ends with comment sequence #...\n */ if (nl == NULL) { /* But we've not found the \n yet */ break; } } else if (u[0] == '\\') { if (u[1] == '\n') { /* Trim escaped newline. */ total_size -= 2; mtree->line.s[total_size] = '\0'; break; } else if (u[1] != '\0') { /* Skip the two-char escape sequence */ ++u; } } } find_off = u - mtree->line.s; } } diff --git a/contrib/libarchive/libarchive/archive_read_support_format_zip.c b/contrib/libarchive/libarchive/archive_read_support_format_zip.c index 3456c100c6f5..75df44ba93aa 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_zip.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_zip.c @@ -1,4270 +1,4270 @@ /*- * Copyright (c) 2004-2013 Tim Kientzle * Copyright (c) 2011-2012,2014 Michihiro NAKAJIMA * Copyright (c) 2013 Konrad Kleine * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); /* * The definitive documentation of the Zip file format is: * http://www.pkware.com/documents/casestudies/APPNOTE.TXT * * The Info-Zip project has pioneered various extensions to better * support Zip on Unix, including the 0x5455 "UT", 0x5855 "UX", 0x7855 * "Ux", and 0x7875 "ux" extensions for time and ownership * information. * * History of this code: The streaming Zip reader was first added to * libarchive in January 2005. Support for seekable input sources was * added in Nov 2011. Zip64 support (including a significant code * refactoring) was added in 2014. */ #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_ZLIB_H #include #endif #ifdef HAVE_BZLIB_H #include #endif #ifdef HAVE_LZMA_H #include #endif #ifdef HAVE_ZSTD_H #include #endif #include "archive.h" #include "archive_digest_private.h" #include "archive_cryptor_private.h" #include "archive_endian.h" #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_hmac_private.h" #include "archive_private.h" #include "archive_rb.h" #include "archive_read_private.h" #include "archive_ppmd8_private.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" #endif struct zip_entry { struct archive_rb_node node; struct zip_entry *next; int64_t local_header_offset; int64_t compressed_size; int64_t uncompressed_size; int64_t gid; int64_t uid; struct archive_string rsrcname; time_t mtime; time_t atime; time_t ctime; uint32_t crc32; uint16_t mode; uint16_t zip_flags; /* From GP Flags Field */ unsigned char compression; unsigned char system; /* From "version written by" */ unsigned char flags; /* Our extra markers. */ unsigned char decdat;/* Used for Decryption check */ /* WinZip AES encryption extra field should be available * when compression is 99. */ struct { /* Vendor version: AE-1 - 0x0001, AE-2 - 0x0002 */ unsigned vendor; #define AES_VENDOR_AE_1 0x0001 #define AES_VENDOR_AE_2 0x0002 /* AES encryption strength: * 1 - 128 bits, 2 - 192 bits, 2 - 256 bits. */ unsigned strength; /* Actual compression method. */ unsigned char compression; } aes_extra; }; struct trad_enc_ctx { uint32_t keys[3]; }; /* Bits used in zip_flags. */ #define ZIP_ENCRYPTED (1 << 0) #define ZIP_LENGTH_AT_END (1 << 3) #define ZIP_STRONG_ENCRYPTED (1 << 6) #define ZIP_UTF8_NAME (1 << 11) /* See "7.2 Single Password Symmetric Encryption Method" in http://www.pkware.com/documents/casestudies/APPNOTE.TXT */ #define ZIP_CENTRAL_DIRECTORY_ENCRYPTED (1 << 13) /* Bits used in flags. */ #define LA_USED_ZIP64 (1 << 0) #define LA_FROM_CENTRAL_DIRECTORY (1 << 1) /* * See "WinZip - AES Encryption Information" * http://www.winzip.com/aes_info.htm */ /* Value used in compression method. */ #define WINZIP_AES_ENCRYPTION 99 /* Authentication code size. */ #define AUTH_CODE_SIZE 10 /**/ #define MAX_DERIVED_KEY_BUF_SIZE (AES_MAX_KEY_SIZE * 2 + 2) struct zip { /* Structural information about the archive. */ struct archive_string format_name; int64_t central_directory_offset; int64_t central_directory_offset_adjusted; size_t central_directory_entries_total; size_t central_directory_entries_on_this_disk; int has_encrypted_entries; /* List of entries (seekable Zip only) */ struct zip_entry *zip_entries; struct archive_rb_tree tree; struct archive_rb_tree tree_rsrc; /* Bytes read but not yet consumed via __archive_read_consume() */ size_t unconsumed; /* Information about entry we're currently reading. */ struct zip_entry *entry; int64_t entry_bytes_remaining; /* These count the number of bytes actually read for the entry. */ int64_t entry_compressed_bytes_read; int64_t entry_uncompressed_bytes_read; /* Running CRC32 of the decompressed data */ unsigned long entry_crc32; unsigned long (*crc32func)(unsigned long, const void *, size_t); char ignore_crc32; /* Flags to mark progress of decompression. */ char decompress_init; char end_of_entry; unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; #ifdef HAVE_ZLIB_H z_stream stream; char stream_valid; #endif #if HAVE_LZMA_H && HAVE_LIBLZMA lzma_stream zipx_lzma_stream; char zipx_lzma_valid; #endif #ifdef HAVE_BZLIB_H bz_stream bzstream; char bzstream_valid; #endif #if HAVE_ZSTD_H && HAVE_LIBZSTD ZSTD_DStream *zstdstream; char zstdstream_valid; #endif IByteIn zipx_ppmd_stream; ssize_t zipx_ppmd_read_compressed; CPpmd8 ppmd8; char ppmd8_valid; char ppmd8_stream_failed; struct archive_string_conv *sconv; struct archive_string_conv *sconv_default; struct archive_string_conv *sconv_utf8; int init_default_conversion; int process_mac_extensions; char init_decryption; /* Decryption buffer. */ /* * The decrypted data starts at decrypted_ptr and * extends for decrypted_bytes_remaining. Decryption * adds new data to the end of this block, data is returned * to clients from the beginning. When the block hits the * end of decrypted_buffer, it has to be shuffled back to * the beginning of the buffer. */ unsigned char *decrypted_buffer; unsigned char *decrypted_ptr; size_t decrypted_buffer_size; size_t decrypted_bytes_remaining; size_t decrypted_unconsumed_bytes; /* Traditional PKWARE decryption. */ struct trad_enc_ctx tctx; char tctx_valid; /* WinZip AES decryption. */ /* Contexts used for AES decryption. */ archive_crypto_ctx cctx; char cctx_valid; archive_hmac_sha1_ctx hctx; char hctx_valid; /* Strong encryption's decryption header information. */ unsigned iv_size; unsigned alg_id; unsigned bit_len; unsigned flags; unsigned erd_size; unsigned v_size; unsigned v_crc32; uint8_t *iv; uint8_t *erd; uint8_t *v_data; }; /* Many systems define min or MIN, but not all. */ #define zipmin(a,b) ((a) < (b) ? (a) : (b)) #ifdef HAVE_ZLIB_H static int zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); #endif #if HAVE_LZMA_H && HAVE_LIBLZMA static int zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); #endif /* This function is used by Ppmd8_DecodeSymbol during decompression of Ppmd8 * streams inside ZIP files. It has 2 purposes: one is to fetch the next * compressed byte from the stream, second one is to increase the counter how * many compressed bytes were read. */ static Byte ppmd_read(void* p) { /* Get the handle to current decompression context. */ struct archive_read *a = ((IByteIn*)p)->a; struct zip *zip = (struct zip*) a->format->data; ssize_t bytes_avail = 0; /* Fetch next byte. */ const uint8_t* data = __archive_read_ahead(a, 1, &bytes_avail); if(bytes_avail < 1) { zip->ppmd8_stream_failed = 1; return 0; } __archive_read_consume(a, 1); /* Increment the counter. */ ++zip->zipx_ppmd_read_compressed; /* Return the next compressed byte. */ return data[0]; } /* ------------------------------------------------------------------------ */ /* Traditional PKWARE Decryption functions. */ static void trad_enc_update_keys(struct trad_enc_ctx *ctx, uint8_t c) { uint8_t t; #define CRC32(c, b) (crc32(c ^ 0xffffffffUL, &b, 1) ^ 0xffffffffUL) ctx->keys[0] = CRC32(ctx->keys[0], c); ctx->keys[1] = (ctx->keys[1] + (ctx->keys[0] & 0xff)) * 134775813L + 1; t = (ctx->keys[1] >> 24) & 0xff; ctx->keys[2] = CRC32(ctx->keys[2], t); #undef CRC32 } static uint8_t trad_enc_decrypt_byte(struct trad_enc_ctx *ctx) { unsigned temp = ctx->keys[2] | 2; return (uint8_t)((temp * (temp ^ 1)) >> 8) & 0xff; } static void trad_enc_decrypt_update(struct trad_enc_ctx *ctx, const uint8_t *in, size_t in_len, uint8_t *out, size_t out_len) { unsigned i, max; max = (unsigned)((in_len < out_len)? in_len: out_len); for (i = 0; i < max; i++) { uint8_t t = in[i] ^ trad_enc_decrypt_byte(ctx); out[i] = t; trad_enc_update_keys(ctx, t); } } static int trad_enc_init(struct trad_enc_ctx *ctx, const char *pw, size_t pw_len, const uint8_t *key, size_t key_len, uint8_t *crcchk) { uint8_t header[12]; if (key_len < 12) { *crcchk = 0xff; return -1; } ctx->keys[0] = 305419896L; ctx->keys[1] = 591751049L; ctx->keys[2] = 878082192L; for (;pw_len; --pw_len) trad_enc_update_keys(ctx, *pw++); trad_enc_decrypt_update(ctx, key, 12, header, 12); /* Return the last byte for CRC check. */ *crcchk = header[11]; return 0; } #if 0 static void crypt_derive_key_sha1(const void *p, int size, unsigned char *key, int key_size) { #define MD_SIZE 20 archive_sha1_ctx ctx; unsigned char md1[MD_SIZE]; unsigned char md2[MD_SIZE * 2]; unsigned char mkb[64]; int i; archive_sha1_init(&ctx); archive_sha1_update(&ctx, p, size); archive_sha1_final(&ctx, md1); memset(mkb, 0x36, sizeof(mkb)); for (i = 0; i < MD_SIZE; i++) mkb[i] ^= md1[i]; archive_sha1_init(&ctx); archive_sha1_update(&ctx, mkb, sizeof(mkb)); archive_sha1_final(&ctx, md2); memset(mkb, 0x5C, sizeof(mkb)); for (i = 0; i < MD_SIZE; i++) mkb[i] ^= md1[i]; archive_sha1_init(&ctx); archive_sha1_update(&ctx, mkb, sizeof(mkb)); archive_sha1_final(&ctx, md2 + MD_SIZE); if (key_size > 32) key_size = 32; memcpy(key, md2, key_size); #undef MD_SIZE } #endif /* * Common code for streaming or seeking modes. * * Includes code to read local file headers, decompress data * from entry bodies, and common API. */ static unsigned long real_crc32(unsigned long crc, const void *buff, size_t len) { return crc32(crc, buff, (unsigned int)len); } /* Used by "ignorecrc32" option to speed up tests. */ static unsigned long fake_crc32(unsigned long crc, const void *buff, size_t len) { (void)crc; /* UNUSED */ (void)buff; /* UNUSED */ (void)len; /* UNUSED */ return 0; } static const struct { int id; const char * name; } compression_methods[] = { {0, "uncompressed"}, /* The file is stored (no compression) */ {1, "shrinking"}, /* The file is Shrunk */ {2, "reduced-1"}, /* The file is Reduced with compression factor 1 */ {3, "reduced-2"}, /* The file is Reduced with compression factor 2 */ {4, "reduced-3"}, /* The file is Reduced with compression factor 3 */ {5, "reduced-4"}, /* The file is Reduced with compression factor 4 */ {6, "imploded"}, /* The file is Imploded */ {7, "reserved"}, /* Reserved for Tokenizing compression algorithm */ {8, "deflation"}, /* The file is Deflated */ {9, "deflation-64-bit"}, /* Enhanced Deflating using Deflate64(tm) */ {10, "ibm-terse"},/* PKWARE Data Compression Library Imploding * (old IBM TERSE) */ {11, "reserved"}, /* Reserved by PKWARE */ {12, "bzip"}, /* File is compressed using BZIP2 algorithm */ {13, "reserved"}, /* Reserved by PKWARE */ {14, "lzma"}, /* LZMA (EFS) */ {15, "reserved"}, /* Reserved by PKWARE */ {16, "reserved"}, /* Reserved by PKWARE */ {17, "reserved"}, /* Reserved by PKWARE */ {18, "ibm-terse-new"}, /* File is compressed using IBM TERSE (new) */ {19, "ibm-lz777"},/* IBM LZ77 z Architecture (PFS) */ {93, "zstd"}, /* Zstandard (zstd) Compression */ {95, "xz"}, /* XZ compressed data */ {96, "jpeg"}, /* JPEG compressed data */ {97, "wav-pack"}, /* WavPack compressed data */ {98, "ppmd-1"}, /* PPMd version I, Rev 1 */ {99, "aes"} /* WinZip AES encryption */ }; static const char * compression_name(const int compression) { static const int num_compression_methods = sizeof(compression_methods)/sizeof(compression_methods[0]); int i=0; while(compression >= 0 && i < num_compression_methods) { if (compression_methods[i].id == compression) return compression_methods[i].name; i++; } return "??"; } /* Convert an MSDOS-style date/time into Unix-style time. */ static time_t zip_time(const char *p) { int msTime, msDate; struct tm ts; msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); memset(&ts, 0, sizeof(ts)); ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ ts.tm_mday = msDate & 0x1f; /* Day of month. */ ts.tm_hour = (msTime >> 11) & 0x1f; ts.tm_min = (msTime >> 5) & 0x3f; ts.tm_sec = (msTime << 1) & 0x3e; ts.tm_isdst = -1; return mktime(&ts); } /* * The extra data is stored as a list of * id1+size1+data1 + id2+size2+data2 ... * triplets. id and size are 2 bytes each. */ static int process_extra(struct archive_read *a, struct archive_entry *entry, const char *p, size_t extra_length, struct zip_entry* zip_entry) { unsigned offset = 0; struct zip *zip = (struct zip *)(a->format->data); if (extra_length == 0) { return ARCHIVE_OK; } if (extra_length < 4) { size_t i = 0; /* Some ZIP files may have trailing 0 bytes. Let's check they * are all 0 and ignore them instead of returning an error. * * This is not technically correct, but some ZIP files look * like this and other tools support those files - so let's * also support them. */ for (; i < extra_length; i++) { if (p[i] != 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Too-small extra data: " "Need at least 4 bytes, " "but only found %d bytes", (int)extra_length); return ARCHIVE_FAILED; } } return ARCHIVE_OK; } while (offset <= extra_length - 4) { unsigned short headerid = archive_le16dec(p + offset); unsigned short datasize = archive_le16dec(p + offset + 2); offset += 4; if (offset + datasize > extra_length) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Extra data overflow: " "Need %d bytes but only found %d bytes", (int)datasize, (int)(extra_length - offset)); return ARCHIVE_FAILED; } #ifdef DEBUG fprintf(stderr, "Header id 0x%04x, length %d\n", headerid, datasize); #endif switch (headerid) { case 0x0001: /* Zip64 extended information extra field. */ zip_entry->flags |= LA_USED_ZIP64; if (zip_entry->uncompressed_size == 0xffffffff) { uint64_t t = 0; if (datasize < 8 || (t = archive_le64dec(p + offset)) > INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Malformed 64-bit " "uncompressed size"); return ARCHIVE_FAILED; } zip_entry->uncompressed_size = t; offset += 8; datasize -= 8; } if (zip_entry->compressed_size == 0xffffffff) { uint64_t t = 0; if (datasize < 8 || (t = archive_le64dec(p + offset)) > INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Malformed 64-bit " "compressed size"); return ARCHIVE_FAILED; } zip_entry->compressed_size = t; offset += 8; datasize -= 8; } if (zip_entry->local_header_offset == 0xffffffff) { uint64_t t = 0; if (datasize < 8 || (t = archive_le64dec(p + offset)) > INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Malformed 64-bit " "local header offset"); return ARCHIVE_FAILED; } zip_entry->local_header_offset = t; offset += 8; datasize -= 8; } /* archive_le32dec(p + offset) gives disk * on which file starts, but we don't handle * multi-volume Zip files. */ break; #ifdef DEBUG case 0x0017: { /* Strong encryption field. */ if (archive_le16dec(p + offset) == 2) { unsigned algId = archive_le16dec(p + offset + 2); unsigned bitLen = archive_le16dec(p + offset + 4); int flags = archive_le16dec(p + offset + 6); fprintf(stderr, "algId=0x%04x, bitLen=%u, " "flgas=%d\n", algId, bitLen,flags); } break; } #endif case 0x5455: { /* Extended time field "UT". */ int flags; if (datasize == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Incomplete extended time field"); return ARCHIVE_FAILED; } flags = p[offset]; offset++; datasize--; /* Flag bits indicate which dates are present. */ if (flags & 0x01) { #ifdef DEBUG fprintf(stderr, "mtime: %lld -> %d\n", (long long)zip_entry->mtime, archive_le32dec(p + offset)); #endif if (datasize < 4) break; zip_entry->mtime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } if (flags & 0x02) { if (datasize < 4) break; zip_entry->atime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } if (flags & 0x04) { if (datasize < 4) break; zip_entry->ctime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } break; } case 0x5855: { /* Info-ZIP Unix Extra Field (old version) "UX". */ if (datasize >= 8) { zip_entry->atime = archive_le32dec(p + offset); zip_entry->mtime = archive_le32dec(p + offset + 4); } if (datasize >= 12) { zip_entry->uid = archive_le16dec(p + offset + 8); zip_entry->gid = archive_le16dec(p + offset + 10); } break; } case 0x6c78: { /* Experimental 'xl' field */ /* * Introduced Dec 2013 to provide a way to * include external file attributes (and other * fields that ordinarily appear only in * central directory) in local file header. * This provides file type and permission * information necessary to support full * streaming extraction. Currently being * discussed with other Zip developers * ... subject to change. * * Format: * The field starts with a bitmap that specifies * which additional fields are included. The * bitmap is variable length and can be extended in * the future. * * n bytes - feature bitmap: first byte has low-order * 7 bits. If high-order bit is set, a subsequent * byte holds the next 7 bits, etc. * * if bitmap & 1, 2 byte "version made by" * if bitmap & 2, 2 byte "internal file attributes" * if bitmap & 4, 4 byte "external file attributes" * if bitmap & 8, 2 byte comment length + n byte * comment */ int bitmap, bitmap_last; if (datasize < 1) break; bitmap_last = bitmap = 0xff & p[offset]; offset += 1; datasize -= 1; /* We only support first 7 bits of bitmap; skip rest. */ while ((bitmap_last & 0x80) != 0 && datasize >= 1) { bitmap_last = p[offset]; offset += 1; datasize -= 1; } if (bitmap & 1) { /* 2 byte "version made by" */ if (datasize < 2) break; zip_entry->system = archive_le16dec(p + offset) >> 8; offset += 2; datasize -= 2; } if (bitmap & 2) { /* 2 byte "internal file attributes" */ uint32_t internal_attributes; if (datasize < 2) break; internal_attributes = archive_le16dec(p + offset); /* Not used by libarchive at present. */ (void)internal_attributes; /* UNUSED */ offset += 2; datasize -= 2; } if (bitmap & 4) { /* 4 byte "external file attributes" */ uint32_t external_attributes; if (datasize < 4) break; external_attributes = archive_le32dec(p + offset); if (zip_entry->system == 3) { zip_entry->mode = external_attributes >> 16; } else if (zip_entry->system == 0) { // Interpret MSDOS directory bit if (0x10 == (external_attributes & 0x10)) { zip_entry->mode = AE_IFDIR | 0775; } else { zip_entry->mode = AE_IFREG | 0664; } if (0x01 == (external_attributes & 0x01)) { /* Read-only bit; * strip write permissions */ zip_entry->mode &= 0555; } } else { zip_entry->mode = 0; } offset += 4; datasize -= 4; } if (bitmap & 8) { /* 2 byte comment length + comment */ uint32_t comment_length; if (datasize < 2) break; comment_length = archive_le16dec(p + offset); offset += 2; datasize -= 2; if (datasize < comment_length) break; /* Comment is not supported by libarchive */ offset += comment_length; datasize -= comment_length; } break; } case 0x7075: { /* Info-ZIP Unicode Path Extra Field. */ if (datasize < 5 || entry == NULL) break; offset += 5; datasize -= 5; /* The path name in this field is always encoded * in UTF-8. */ if (zip->sconv_utf8 == NULL) { zip->sconv_utf8 = archive_string_conversion_from_charset( &a->archive, "UTF-8", 1); /* If the converter from UTF-8 is not * available, then the path name from the main * field will more likely be correct. */ if (zip->sconv_utf8 == NULL) break; } /* Make sure the CRC32 of the filename matches. */ if (!zip->ignore_crc32) { const char *cp = archive_entry_pathname(entry); if (cp) { unsigned long file_crc = zip->crc32func(0, cp, strlen(cp)); unsigned long utf_crc = archive_le32dec(p + offset - 4); if (file_crc != utf_crc) { #ifdef DEBUG fprintf(stderr, "CRC filename mismatch; " "CDE is %lx, but UTF8 " "is outdated with %lx\n", file_crc, utf_crc); #endif break; } } } if (archive_entry_copy_pathname_l(entry, p + offset, datasize, zip->sconv_utf8) != 0) { /* Ignore the error, and fallback to the path * name from the main field. */ #ifdef DEBUG fprintf(stderr, "Failed to read the ZIP " "0x7075 extra field path.\n"); #endif } break; } case 0x7855: /* Info-ZIP Unix Extra Field (type 2) "Ux". */ #ifdef DEBUG fprintf(stderr, "uid %d gid %d\n", archive_le16dec(p + offset), archive_le16dec(p + offset + 2)); #endif if (datasize >= 2) zip_entry->uid = archive_le16dec(p + offset); if (datasize >= 4) zip_entry->gid = archive_le16dec(p + offset + 2); break; case 0x7875: { /* Info-Zip Unix Extra Field (type 3) "ux". */ int uidsize = 0, gidsize = 0; /* TODO: support arbitrary uidsize/gidsize. */ if (datasize >= 1 && p[offset] == 1) {/* version=1 */ if (datasize >= 4) { /* get a uid size. */ uidsize = 0xff & (int)p[offset+1]; if (uidsize == 2) zip_entry->uid = archive_le16dec( p + offset + 2); else if (uidsize == 4 && datasize >= 6) zip_entry->uid = archive_le32dec( p + offset + 2); } if (datasize >= (2 + uidsize + 3)) { /* get a gid size. */ gidsize = 0xff & (int)p[offset+2+uidsize]; if (gidsize == 2) zip_entry->gid = archive_le16dec( p+offset+2+uidsize+1); else if (gidsize == 4 && datasize >= (2 + uidsize + 5)) zip_entry->gid = archive_le32dec( p+offset+2+uidsize+1); } } break; } case 0x9901: /* WinZip AES extra data field. */ if (datasize < 6) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Incomplete AES field"); return ARCHIVE_FAILED; } if (p[offset + 2] == 'A' && p[offset + 3] == 'E') { /* Vendor version. */ zip_entry->aes_extra.vendor = archive_le16dec(p + offset); /* AES encryption strength. */ zip_entry->aes_extra.strength = p[offset + 4]; /* Actual compression method. */ zip_entry->aes_extra.compression = p[offset + 5]; } break; default: break; } offset += datasize; } return ARCHIVE_OK; } /* * Assumes file pointer is at beginning of local file header. */ static int zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, struct zip *zip) { const char *p; const void *h; const wchar_t *wp; const char *cp; size_t len, filename_length, extra_length; struct archive_string_conv *sconv; struct zip_entry *zip_entry = zip->entry; struct zip_entry zip_entry_central_dir; int ret = ARCHIVE_OK; char version; /* Save a copy of the original for consistency checks. */ zip_entry_central_dir = *zip_entry; zip->decompress_init = 0; zip->end_of_entry = 0; zip->entry_uncompressed_bytes_read = 0; zip->entry_compressed_bytes_read = 0; zip->entry_crc32 = zip->crc32func(0, NULL, 0); /* Setup default conversion. */ if (zip->sconv == NULL && !zip->init_default_conversion) { zip->sconv_default = archive_string_default_conversion_for_read(&(a->archive)); zip->init_default_conversion = 1; } if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } if (memcmp(p, "PK\003\004", 4) != 0) { archive_set_error(&a->archive, -1, "Damaged Zip archive"); return ARCHIVE_FATAL; } version = p[4]; zip_entry->system = p[5]; zip_entry->zip_flags = archive_le16dec(p + 6); if (zip_entry->zip_flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) { zip->has_encrypted_entries = 1; archive_entry_set_is_data_encrypted(entry, 1); if (zip_entry->zip_flags & ZIP_CENTRAL_DIRECTORY_ENCRYPTED && zip_entry->zip_flags & ZIP_ENCRYPTED && zip_entry->zip_flags & ZIP_STRONG_ENCRYPTED) { archive_entry_set_is_metadata_encrypted(entry, 1); return ARCHIVE_FATAL; } } zip->init_decryption = (zip_entry->zip_flags & ZIP_ENCRYPTED); zip_entry->compression = (char)archive_le16dec(p + 8); zip_entry->mtime = zip_time(p + 10); zip_entry->crc32 = archive_le32dec(p + 14); if (zip_entry->zip_flags & ZIP_LENGTH_AT_END) zip_entry->decdat = p[11]; else zip_entry->decdat = p[17]; zip_entry->compressed_size = archive_le32dec(p + 18); zip_entry->uncompressed_size = archive_le32dec(p + 22); filename_length = archive_le16dec(p + 26); extra_length = archive_le16dec(p + 28); __archive_read_consume(a, 30); /* Read the filename. */ if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } if (zip_entry->zip_flags & ZIP_UTF8_NAME) { /* The filename is stored to be UTF-8. */ if (zip->sconv_utf8 == NULL) { zip->sconv_utf8 = archive_string_conversion_from_charset( &a->archive, "UTF-8", 1); if (zip->sconv_utf8 == NULL) return (ARCHIVE_FATAL); } sconv = zip->sconv_utf8; } else if (zip->sconv != NULL) sconv = zip->sconv; else sconv = zip->sconv_default; if (archive_entry_copy_pathname_l(entry, h, filename_length, sconv) != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Pathname"); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Pathname cannot be converted " "from %s to current locale.", archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; } __archive_read_consume(a, filename_length); /* Read the extra data. */ if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } if (ARCHIVE_OK != process_extra(a, entry, h, extra_length, zip_entry)) { return ARCHIVE_FATAL; } __archive_read_consume(a, extra_length); /* Work around a bug in Info-Zip: When reading from a pipe, it * stats the pipe instead of synthesizing a file entry. */ if ((zip_entry->mode & AE_IFMT) == AE_IFIFO) { zip_entry->mode &= ~ AE_IFMT; zip_entry->mode |= AE_IFREG; } /* If the mode is totally empty, set some sane default. */ if (zip_entry->mode == 0) { zip_entry->mode |= 0664; } /* Windows archivers sometimes use backslash as the directory * separator. Normalize to slash. */ if (zip_entry->system == 0 && (wp = archive_entry_pathname_w(entry)) != NULL) { if (wcschr(wp, L'/') == NULL && wcschr(wp, L'\\') != NULL) { size_t i; struct archive_wstring s; archive_string_init(&s); archive_wstrcpy(&s, wp); for (i = 0; i < archive_strlen(&s); i++) { if (s.s[i] == '\\') s.s[i] = '/'; } archive_entry_copy_pathname_w(entry, s.s); archive_wstring_free(&s); } } /* Make sure that entries with a trailing '/' are marked as directories * even if the External File Attributes contains bogus values. If this * is not a directory and there is no type, assume a regular file. */ if ((zip_entry->mode & AE_IFMT) != AE_IFDIR) { int has_slash; wp = archive_entry_pathname_w(entry); if (wp != NULL) { len = wcslen(wp); has_slash = len > 0 && wp[len - 1] == L'/'; } else { cp = archive_entry_pathname(entry); len = (cp != NULL)?strlen(cp):0; has_slash = len > 0 && cp[len - 1] == '/'; } /* Correct file type as needed. */ if (has_slash) { zip_entry->mode &= ~AE_IFMT; zip_entry->mode |= AE_IFDIR; zip_entry->mode |= 0111; } else if ((zip_entry->mode & AE_IFMT) == 0) { zip_entry->mode |= AE_IFREG; } } /* Make sure directories end in '/' */ if ((zip_entry->mode & AE_IFMT) == AE_IFDIR) { wp = archive_entry_pathname_w(entry); if (wp != NULL) { len = wcslen(wp); if (len > 0 && wp[len - 1] != L'/') { struct archive_wstring s; archive_string_init(&s); archive_wstrcat(&s, wp); archive_wstrappend_wchar(&s, L'/'); archive_entry_copy_pathname_w(entry, s.s); archive_wstring_free(&s); } } else { cp = archive_entry_pathname(entry); len = (cp != NULL)?strlen(cp):0; if (len > 0 && cp[len - 1] != '/') { struct archive_string s; archive_string_init(&s); archive_strcat(&s, cp); archive_strappend_char(&s, '/'); archive_entry_set_pathname(entry, s.s); archive_string_free(&s); } } } if (zip_entry->flags & LA_FROM_CENTRAL_DIRECTORY) { /* If this came from the central dir, its size info * is definitive, so ignore the length-at-end flag. */ zip_entry->zip_flags &= ~ZIP_LENGTH_AT_END; /* If local header is missing a value, use the one from the central directory. If both have it, warn about mismatches. */ if (zip_entry->crc32 == 0) { zip_entry->crc32 = zip_entry_central_dir.crc32; } else if (!zip->ignore_crc32 && zip_entry->crc32 != zip_entry_central_dir.crc32) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent CRC32 values"); ret = ARCHIVE_WARN; } if (zip_entry->compressed_size == 0) { zip_entry->compressed_size = zip_entry_central_dir.compressed_size; } else if (zip_entry->compressed_size != zip_entry_central_dir.compressed_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent compressed size: " "%jd in central directory, %jd in local header", (intmax_t)zip_entry_central_dir.compressed_size, (intmax_t)zip_entry->compressed_size); ret = ARCHIVE_WARN; } if (zip_entry->uncompressed_size == 0 || zip_entry->uncompressed_size == 0xffffffff) { zip_entry->uncompressed_size = zip_entry_central_dir.uncompressed_size; } else if (zip_entry->uncompressed_size != zip_entry_central_dir.uncompressed_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent uncompressed size: " "%jd in central directory, %jd in local header", (intmax_t)zip_entry_central_dir.uncompressed_size, (intmax_t)zip_entry->uncompressed_size); ret = ARCHIVE_WARN; } } /* Populate some additional entry fields: */ archive_entry_set_mode(entry, zip_entry->mode); archive_entry_set_uid(entry, zip_entry->uid); archive_entry_set_gid(entry, zip_entry->gid); archive_entry_set_mtime(entry, zip_entry->mtime, 0); archive_entry_set_ctime(entry, zip_entry->ctime, 0); archive_entry_set_atime(entry, zip_entry->atime, 0); if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { size_t linkname_length; if (zip_entry->compressed_size > 64 * 1024) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Zip file with oversized link entry"); return ARCHIVE_FATAL; } linkname_length = (size_t)zip_entry->compressed_size; archive_entry_set_size(entry, 0); // take into account link compression if any size_t linkname_full_length = linkname_length; if (zip->entry->compression != 0) { // symlink target string appeared to be compressed int status = ARCHIVE_FATAL; const void *uncompressed_buffer = NULL; switch (zip->entry->compression) { #if HAVE_ZLIB_H case 8: /* Deflate compression. */ zip->entry_bytes_remaining = zip_entry->compressed_size; status = zip_read_data_deflate(a, &uncompressed_buffer, &linkname_full_length, NULL); break; #endif #if HAVE_LZMA_H && HAVE_LIBLZMA case 14: /* ZIPx LZMA compression. */ /*(see zip file format specification, section 4.4.5)*/ zip->entry_bytes_remaining = zip_entry->compressed_size; status = zip_read_data_zipx_lzma_alone(a, &uncompressed_buffer, &linkname_full_length, NULL); break; #endif default: /* Unsupported compression. */ break; } if (status == ARCHIVE_OK) { p = uncompressed_buffer; } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported ZIP compression method " "during decompression of link entry (%d: %s)", zip->entry->compression, compression_name(zip->entry->compression)); return ARCHIVE_FAILED; } } else { p = __archive_read_ahead(a, linkname_length, NULL); } if (p == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Truncated Zip file"); return ARCHIVE_FATAL; } sconv = zip->sconv; if (sconv == NULL && (zip->entry->zip_flags & ZIP_UTF8_NAME)) sconv = zip->sconv_utf8; if (sconv == NULL) sconv = zip->sconv_default; if (archive_entry_copy_symlink_l(entry, p, linkname_full_length, sconv) != 0) { if (errno != ENOMEM && sconv == zip->sconv_utf8 && (zip->entry->zip_flags & ZIP_UTF8_NAME)) archive_entry_copy_symlink_l(entry, p, linkname_full_length, NULL); if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Symlink"); return (ARCHIVE_FATAL); } /* * Since there is no character-set regulation for * symlink name, do not report the conversion error * in an automatic conversion. */ if (sconv != zip->sconv_utf8 || (zip->entry->zip_flags & ZIP_UTF8_NAME) == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Symlink cannot be converted " "from %s to current locale.", archive_string_conversion_charset_name( sconv)); ret = ARCHIVE_WARN; } } zip_entry->uncompressed_size = zip_entry->compressed_size = 0; if (__archive_read_consume(a, linkname_length) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Read error skipping symlink target name"); return ARCHIVE_FATAL; } } else if (0 == (zip_entry->zip_flags & ZIP_LENGTH_AT_END) || zip_entry->uncompressed_size > 0) { /* Set the size only if it's meaningful. */ archive_entry_set_size(entry, zip_entry->uncompressed_size); } zip->entry_bytes_remaining = zip_entry->compressed_size; /* If there's no body, force read_data() to return EOF immediately. */ if (0 == (zip_entry->zip_flags & ZIP_LENGTH_AT_END) && zip->entry_bytes_remaining < 1) zip->end_of_entry = 1; /* Set up a more descriptive format name. */ archive_string_empty(&zip->format_name); archive_string_sprintf(&zip->format_name, "ZIP %d.%d (%s)", version / 10, version % 10, compression_name(zip->entry->compression)); a->archive.archive_format_name = zip->format_name.s; return (ret); } static int check_authentication_code(struct archive_read *a, const void *_p) { struct zip *zip = (struct zip *)(a->format->data); /* Check authentication code. */ if (zip->hctx_valid) { const void *p; uint8_t hmac[20]; size_t hmac_len = 20; int cmp; archive_hmac_sha1_final(&zip->hctx, hmac, &hmac_len); if (_p == NULL) { /* Read authentication code. */ p = __archive_read_ahead(a, AUTH_CODE_SIZE, NULL); if (p == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } } else { p = _p; } cmp = memcmp(hmac, p, AUTH_CODE_SIZE); __archive_read_consume(a, AUTH_CODE_SIZE); if (cmp != 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP bad Authentication code"); return (ARCHIVE_WARN); } } return (ARCHIVE_OK); } /* * Read "uncompressed" data. There are three cases: * 1) We know the size of the data. This is always true for the * seeking reader (we've examined the Central Directory already). * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. * Info-ZIP seems to do this; we know the size but have to grab * the CRC from the data descriptor afterwards. * 3) We're streaming and ZIP_LENGTH_AT_END was specified and * we have no size information. In this case, we can do pretty * well by watching for the data descriptor record. The data * descriptor is 16 bytes and includes a computed CRC that should * provide a strong check. * * TODO: Technically, the PK\007\010 signature is optional. * In the original spec, the data descriptor contained CRC * and size fields but had no leading signature. In practice, * newer writers seem to provide the signature pretty consistently. * * For uncompressed data, the PK\007\010 marker seems essential * to be sure we've actually seen the end of the entry. * * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets * zip->end_of_entry if it consumes all of the data. */ static int zip_read_data_none(struct archive_read *a, const void **_buff, size_t *size, int64_t *offset) { struct zip *zip; const char *buff; ssize_t bytes_avail; int r; (void)offset; /* UNUSED */ zip = (struct zip *)(a->format->data); if (zip->entry->zip_flags & ZIP_LENGTH_AT_END) { const char *p; ssize_t grabbing_bytes = 24; if (zip->hctx_valid) grabbing_bytes += AUTH_CODE_SIZE; /* Grab at least 24 bytes. */ buff = __archive_read_ahead(a, grabbing_bytes, &bytes_avail); if (bytes_avail < grabbing_bytes) { /* Zip archives have end-of-archive markers that are longer than this, so a failure to get at least 24 bytes really does indicate a truncated file. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } /* Check for a complete PK\007\010 signature, followed * by the correct 4-byte CRC. */ p = buff; if (zip->hctx_valid) p += AUTH_CODE_SIZE; if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010' && (archive_le32dec(p + 4) == zip->entry_crc32 || zip->ignore_crc32 || (zip->hctx_valid && zip->entry->aes_extra.vendor == AES_VENDOR_AE_2))) { if (zip->entry->flags & LA_USED_ZIP64) { uint64_t compressed, uncompressed; zip->entry->crc32 = archive_le32dec(p + 4); compressed = archive_le64dec(p + 8); uncompressed = archive_le64dec(p + 16); if (compressed > INT64_MAX || uncompressed > INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Overflow of 64-bit file sizes"); return ARCHIVE_FAILED; } zip->entry->compressed_size = compressed; zip->entry->uncompressed_size = uncompressed; zip->unconsumed = 24; } else { zip->entry->crc32 = archive_le32dec(p + 4); zip->entry->compressed_size = archive_le32dec(p + 8); zip->entry->uncompressed_size = archive_le32dec(p + 12); zip->unconsumed = 16; } if (zip->hctx_valid) { r = check_authentication_code(a, buff); if (r != ARCHIVE_OK) return (r); } zip->end_of_entry = 1; return (ARCHIVE_OK); } /* If not at EOF, ensure we consume at least one byte. */ ++p; /* Scan forward until we see where a PK\007\010 signature * might be. */ /* Return bytes up until that point. On the next call, * the code above will verify the data descriptor. */ while (p < buff + bytes_avail - 4) { if (p[3] == 'P') { p += 3; } else if (p[3] == 'K') { p += 2; } else if (p[3] == '\007') { p += 1; } else if (p[3] == '\010' && p[2] == '\007' && p[1] == 'K' && p[0] == 'P') { if (zip->hctx_valid) p -= AUTH_CODE_SIZE; break; } else { p += 4; } } bytes_avail = p - buff; } else { if (zip->entry_bytes_remaining == 0) { zip->end_of_entry = 1; if (zip->hctx_valid) { r = check_authentication_code(a, NULL); if (r != ARCHIVE_OK) return (r); } return (ARCHIVE_OK); } /* Grab a bunch of bytes. */ buff = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } if (bytes_avail > zip->entry_bytes_remaining) bytes_avail = (ssize_t)zip->entry_bytes_remaining; } if (zip->tctx_valid || zip->cctx_valid) { size_t dec_size = bytes_avail; if (dec_size > zip->decrypted_buffer_size) dec_size = zip->decrypted_buffer_size; if (zip->tctx_valid) { trad_enc_decrypt_update(&zip->tctx, (const uint8_t *)buff, dec_size, zip->decrypted_buffer, dec_size); } else { size_t dsize = dec_size; archive_hmac_sha1_update(&zip->hctx, (const uint8_t *)buff, dec_size); archive_decrypto_aes_ctr_update(&zip->cctx, (const uint8_t *)buff, dec_size, zip->decrypted_buffer, &dsize); } bytes_avail = dec_size; buff = (const char *)zip->decrypted_buffer; } *size = bytes_avail; zip->entry_bytes_remaining -= bytes_avail; zip->entry_uncompressed_bytes_read += bytes_avail; zip->entry_compressed_bytes_read += bytes_avail; zip->unconsumed += bytes_avail; *_buff = buff; return (ARCHIVE_OK); } static int consume_optional_marker(struct archive_read *a, struct zip *zip) { if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { const char *p; if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP end-of-file record"); return (ARCHIVE_FATAL); } /* Consume the optional PK\007\010 marker. */ if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') { p += 4; zip->unconsumed = 4; } if (zip->entry->flags & LA_USED_ZIP64) { uint64_t compressed, uncompressed; zip->entry->crc32 = archive_le32dec(p); compressed = archive_le64dec(p + 4); uncompressed = archive_le64dec(p + 12); if (compressed > INT64_MAX || uncompressed > INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Overflow of 64-bit file sizes"); return ARCHIVE_FAILED; } zip->entry->compressed_size = compressed; zip->entry->uncompressed_size = uncompressed; zip->unconsumed += 20; } else { zip->entry->crc32 = archive_le32dec(p); zip->entry->compressed_size = archive_le32dec(p + 4); zip->entry->uncompressed_size = archive_le32dec(p + 8); zip->unconsumed += 12; } } return (ARCHIVE_OK); } #if HAVE_LZMA_H && HAVE_LIBLZMA static int zipx_xz_init(struct archive_read *a, struct zip *zip) { lzma_ret r; if(zip->zipx_lzma_valid) { lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; } memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); r = lzma_stream_decoder(&zip->zipx_lzma_stream, UINT64_MAX, 0); if (r != LZMA_OK) { archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, "xz initialization failed(%d)", r); return (ARCHIVE_FAILED); } zip->zipx_lzma_valid = 1; free(zip->uncompressed_buffer); zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (uint8_t*) malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for xz decompression"); return (ARCHIVE_FATAL); } zip->decompress_init = 1; return (ARCHIVE_OK); } static int zipx_lzma_alone_init(struct archive_read *a, struct zip *zip) { lzma_ret r; const uint8_t* p; #pragma pack(push) #pragma pack(1) struct _alone_header { uint8_t bytes[5]; uint64_t uncompressed_size; } alone_header; #pragma pack(pop) if(zip->zipx_lzma_valid) { lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; } /* To unpack ZIPX's "LZMA" (id 14) stream we can use standard liblzma * that is a part of XZ Utils. The stream format stored inside ZIPX * file is a modified "lzma alone" file format, that was used by the * `lzma` utility which was later deprecated in favour of `xz` utility. * Since those formats are nearly the same, we can use a standard * "lzma alone" decoder from XZ Utils. */ memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); r = lzma_alone_decoder(&zip->zipx_lzma_stream, UINT64_MAX); if (r != LZMA_OK) { archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, "lzma initialization failed(%d)", r); return (ARCHIVE_FAILED); } /* Flag the cleanup function that we want our lzma-related structures * to be freed later. */ zip->zipx_lzma_valid = 1; /* The "lzma alone" file format and the stream format inside ZIPx are * almost the same. Here's an example of a structure of "lzma alone" * format: * * $ cat /bin/ls | lzma | xxd | head -n 1 * 00000000: 5d00 0080 00ff ffff ffff ffff ff00 2814 * * 5 bytes 8 bytes n bytes * * * lzma_params is a 5-byte blob that has to be decoded to extract * parameters of this LZMA stream. The uncompressed_size field is an * uint64_t value that contains information about the size of the * uncompressed file, or UINT64_MAX if this value is unknown. * The part is the actual lzma-compressed data stream. * * Now here's the structure of the stream inside the ZIPX file: * * $ cat stream_inside_zipx | xxd | head -n 1 * 00000000: 0914 0500 5d00 8000 0000 2814 .... .... * * 2byte 2byte 5 bytes n bytes * * * This means that the ZIPX file contains an additional magic1 and * magic2 headers, the lzma_params field contains the same parameter * set as in the "lzma alone" format, and the field is the * same as in the "lzma alone" format as well. Note that also the zipx * format is missing the uncompressed_size field. * * So, in order to use the "lzma alone" decoder for the zipx lzma * stream, we simply need to shuffle around some fields, prepare a new * lzma alone header, feed it into lzma alone decoder so it will * initialize itself properly, and then we can start feeding normal * zipx lzma stream into the decoder. */ /* Read magic1,magic2,lzma_params from the ZIPX stream. */ - if((p = __archive_read_ahead(a, 9, NULL)) == NULL) { + if(zip->entry_bytes_remaining < 9 || (p = __archive_read_ahead(a, 9, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated lzma data"); return (ARCHIVE_FATAL); } if(p[2] != 0x05 || p[3] != 0x00) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid lzma data"); return (ARCHIVE_FATAL); } /* Prepare an lzma alone header: copy the lzma_params blob into * a proper place into the lzma alone header. */ memcpy(&alone_header.bytes[0], p + 4, 5); /* Initialize the 'uncompressed size' field to unknown; we'll manually * monitor how many bytes there are still to be uncompressed. */ alone_header.uncompressed_size = UINT64_MAX; if(!zip->uncompressed_buffer) { zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (uint8_t*) malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for lzma decompression"); return (ARCHIVE_FATAL); } } zip->zipx_lzma_stream.next_in = (void*) &alone_header; zip->zipx_lzma_stream.avail_in = sizeof(alone_header); zip->zipx_lzma_stream.total_in = 0; zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; zip->zipx_lzma_stream.total_out = 0; /* Feed only the header into the lzma alone decoder. This will * effectively initialize the decoder, and will not produce any * output bytes yet. */ r = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); if (r != LZMA_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "lzma stream initialization error"); return ARCHIVE_FATAL; } /* We've already consumed some bytes, so take this into account. */ __archive_read_consume(a, 9); zip->entry_bytes_remaining -= 9; zip->entry_compressed_bytes_read += 9; zip->decompress_init = 1; return (ARCHIVE_OK); } static int zip_read_data_zipx_xz(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip* zip = (struct zip *)(a->format->data); int ret; lzma_ret lz_ret; const void* compressed_buf; ssize_t bytes_avail, in_bytes, to_consume = 0; (void) offset; /* UNUSED */ /* Initialize decompressor if not yet initialized. */ if (!zip->decompress_init) { ret = zipx_xz_init(a, zip); if (ret != ARCHIVE_OK) return (ret); } compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated xz file body"); return (ARCHIVE_FATAL); } in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); zip->zipx_lzma_stream.next_in = compressed_buf; zip->zipx_lzma_stream.avail_in = in_bytes; zip->zipx_lzma_stream.total_in = 0; zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; zip->zipx_lzma_stream.total_out = 0; /* Perform the decompression. */ lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); switch(lz_ret) { case LZMA_DATA_ERROR: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "xz data error (error %d)", (int) lz_ret); return (ARCHIVE_FATAL); case LZMA_NO_CHECK: case LZMA_OK: break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "xz unknown error %d", (int) lz_ret); return (ARCHIVE_FATAL); case LZMA_STREAM_END: lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; if((int64_t) zip->zipx_lzma_stream.total_in != zip->entry_bytes_remaining) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "xz premature end of stream"); return (ARCHIVE_FATAL); } zip->end_of_entry = 1; break; } to_consume = zip->zipx_lzma_stream.total_in; __archive_read_consume(a, to_consume); zip->entry_bytes_remaining -= to_consume; zip->entry_compressed_bytes_read += to_consume; zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; *size = zip->zipx_lzma_stream.total_out; *buff = zip->uncompressed_buffer; ret = consume_optional_marker(a, zip); if (ret != ARCHIVE_OK) return (ret); return (ARCHIVE_OK); } static int zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip* zip = (struct zip *)(a->format->data); int ret; lzma_ret lz_ret; const void* compressed_buf; ssize_t bytes_avail, in_bytes, to_consume; (void) offset; /* UNUSED */ /* Initialize decompressor if not yet initialized. */ if (!zip->decompress_init) { ret = zipx_lzma_alone_init(a, zip); if (ret != ARCHIVE_OK) return (ret); } /* Fetch more compressed data. The same note as in deflate handler * applies here as well: * * Note: '1' here is a performance optimization. Recall that the * decompression layer returns a count of available bytes; asking for * more than that forces the decompressor to combine reads by copying * data. */ compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated lzma file body"); return (ARCHIVE_FATAL); } /* Set decompressor parameters. */ in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); zip->zipx_lzma_stream.next_in = compressed_buf; zip->zipx_lzma_stream.avail_in = in_bytes; zip->zipx_lzma_stream.total_in = 0; zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; zip->zipx_lzma_stream.avail_out = /* These lzma_alone streams lack end of stream marker, so let's * make sure the unpacker won't try to unpack more than it's * supposed to. */ zipmin((int64_t) zip->uncompressed_buffer_size, zip->entry->uncompressed_size - zip->entry_uncompressed_bytes_read); zip->zipx_lzma_stream.total_out = 0; /* Perform the decompression. */ lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); switch(lz_ret) { case LZMA_DATA_ERROR: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "lzma data error (error %d)", (int) lz_ret); return (ARCHIVE_FATAL); /* This case is optional in lzma alone format. It can happen, * but most of the files don't have it. (GitHub #1257) */ case LZMA_STREAM_END: lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; if((int64_t) zip->zipx_lzma_stream.total_in != zip->entry_bytes_remaining) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "lzma alone premature end of stream"); return (ARCHIVE_FATAL); } zip->end_of_entry = 1; break; case LZMA_OK: break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "lzma unknown error %d", (int) lz_ret); return (ARCHIVE_FATAL); } to_consume = zip->zipx_lzma_stream.total_in; /* Update pointers. */ __archive_read_consume(a, to_consume); zip->entry_bytes_remaining -= to_consume; zip->entry_compressed_bytes_read += to_consume; zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; if(zip->entry_bytes_remaining == 0) { zip->end_of_entry = 1; } /* Return values. */ *size = zip->zipx_lzma_stream.total_out; *buff = zip->uncompressed_buffer; /* Behave the same way as during deflate decompression. */ ret = consume_optional_marker(a, zip); if (ret != ARCHIVE_OK) return (ret); /* Free lzma decoder handle because we'll no longer need it. */ if(zip->end_of_entry) { lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; } /* If we're here, then we're good! */ return (ARCHIVE_OK); } #endif /* HAVE_LZMA_H && HAVE_LIBLZMA */ static int zipx_ppmd8_init(struct archive_read *a, struct zip *zip) { const void* p; uint32_t val; uint32_t order; uint32_t mem; uint32_t restore_method; /* Remove previous decompression context if it exists. */ if(zip->ppmd8_valid) { __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); zip->ppmd8_valid = 0; } /* Create a new decompression context. */ __archive_ppmd8_functions.Ppmd8_Construct(&zip->ppmd8); zip->ppmd8_stream_failed = 0; /* Setup function pointers required by Ppmd8 decompressor. The * 'ppmd_read' function will feed new bytes to the decompressor, * and will increment the 'zip->zipx_ppmd_read_compressed' counter. */ zip->ppmd8.Stream.In = &zip->zipx_ppmd_stream; zip->zipx_ppmd_stream.a = a; zip->zipx_ppmd_stream.Read = &ppmd_read; /* Reset number of read bytes to 0. */ zip->zipx_ppmd_read_compressed = 0; /* Read Ppmd8 header (2 bytes). */ p = __archive_read_ahead(a, 2, NULL); if(!p) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated file data in PPMd8 stream"); return (ARCHIVE_FATAL); } __archive_read_consume(a, 2); /* Decode the stream's compression parameters. */ val = archive_le16dec(p); order = (val & 15) + 1; mem = ((val >> 4) & 0xff) + 1; restore_method = (val >> 12); if(order < 2 || restore_method > 2) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid parameter set in PPMd8 stream (order=%" PRId32 ", " "restore=%" PRId32 ")", order, restore_method); return (ARCHIVE_FAILED); } /* Allocate the memory needed to properly decompress the file. */ if(!__archive_ppmd8_functions.Ppmd8_Alloc(&zip->ppmd8, mem << 20)) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for PPMd8 stream: %" PRId32 " bytes", mem << 20); return (ARCHIVE_FATAL); } /* Signal the cleanup function to release Ppmd8 context in the * cleanup phase. */ zip->ppmd8_valid = 1; /* Perform further Ppmd8 initialization. */ if(!__archive_ppmd8_functions.Ppmd8_RangeDec_Init(&zip->ppmd8)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "PPMd8 stream range decoder initialization error"); return (ARCHIVE_FATAL); } __archive_ppmd8_functions.Ppmd8_Init(&zip->ppmd8, order, restore_method); /* Allocate the buffer that will hold uncompressed data. */ free(zip->uncompressed_buffer); zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (uint8_t*) malloc(zip->uncompressed_buffer_size); if(zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for PPMd8 decompression"); return ARCHIVE_FATAL; } /* Ppmd8 initialization is done. */ zip->decompress_init = 1; /* We've already read 2 bytes in the output stream. Additionally, * Ppmd8 initialization code could read some data as well. So we * are advancing the stream by 2 bytes plus whatever number of * bytes Ppmd8 init function used. */ zip->entry_compressed_bytes_read += 2 + zip->zipx_ppmd_read_compressed; return ARCHIVE_OK; } static int zip_read_data_zipx_ppmd(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip* zip = (struct zip *)(a->format->data); int ret; size_t consumed_bytes = 0; ssize_t bytes_avail = 0; (void) offset; /* UNUSED */ /* If we're here for the first time, initialize Ppmd8 decompression * context first. */ if(!zip->decompress_init) { ret = zipx_ppmd8_init(a, zip); if(ret != ARCHIVE_OK) return ret; } /* Fetch for more data. We're reading 1 byte here, but libarchive * should prefetch more bytes. */ (void) __archive_read_ahead(a, 1, &bytes_avail); if(bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated PPMd8 file body"); return (ARCHIVE_FATAL); } /* This counter will be updated inside ppmd_read(), which at one * point will be called by Ppmd8_DecodeSymbol. */ zip->zipx_ppmd_read_compressed = 0; /* Decompression loop. */ do { int sym = __archive_ppmd8_functions.Ppmd8_DecodeSymbol( &zip->ppmd8); if(sym < 0) { zip->end_of_entry = 1; break; } /* This field is set by ppmd_read() when there was no more data * to be read. */ if(zip->ppmd8_stream_failed) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated PPMd8 file body"); return (ARCHIVE_FATAL); } zip->uncompressed_buffer[consumed_bytes] = (uint8_t) sym; ++consumed_bytes; } while(consumed_bytes < zip->uncompressed_buffer_size); /* Update pointers for libarchive. */ *buff = zip->uncompressed_buffer; *size = consumed_bytes; /* Update pointers so we can continue decompression in another call. */ zip->entry_bytes_remaining -= zip->zipx_ppmd_read_compressed; zip->entry_compressed_bytes_read += zip->zipx_ppmd_read_compressed; zip->entry_uncompressed_bytes_read += consumed_bytes; /* If we're at the end of stream, deinitialize Ppmd8 context. */ if(zip->end_of_entry) { __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); zip->ppmd8_valid = 0; } /* Seek for optional marker, same way as in each zip entry. */ ret = consume_optional_marker(a, zip); if (ret != ARCHIVE_OK) return ret; return ARCHIVE_OK; } #ifdef HAVE_BZLIB_H static int zipx_bzip2_init(struct archive_read *a, struct zip *zip) { int r; /* Deallocate already existing BZ2 decompression context if it * exists. */ if(zip->bzstream_valid) { BZ2_bzDecompressEnd(&zip->bzstream); zip->bzstream_valid = 0; } /* Allocate a new BZ2 decompression context. */ memset(&zip->bzstream, 0, sizeof(bz_stream)); r = BZ2_bzDecompressInit(&zip->bzstream, 0, 1); if(r != BZ_OK) { archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, "bzip2 initialization failed(%d)", r); return ARCHIVE_FAILED; } /* Mark the bzstream field to be released in cleanup phase. */ zip->bzstream_valid = 1; /* (Re)allocate the buffer that will contain decompressed bytes. */ free(zip->uncompressed_buffer); zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (uint8_t*) malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for bzip2 decompression"); return ARCHIVE_FATAL; } /* Initialization done. */ zip->decompress_init = 1; return ARCHIVE_OK; } static int zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip *zip = (struct zip *)(a->format->data); ssize_t bytes_avail = 0, in_bytes, to_consume; const void *compressed_buff; int r; uint64_t total_out; (void) offset; /* UNUSED */ /* Initialize decompression context if we're here for the first time. */ if(!zip->decompress_init) { r = zipx_bzip2_init(a, zip); if(r != ARCHIVE_OK) return r; } /* Fetch more compressed bytes. */ compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); if(bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated bzip2 file body"); return (ARCHIVE_FATAL); } in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); if(in_bytes < 1) { /* libbz2 doesn't complain when caller feeds avail_in == 0. * It will actually return success in this case, which is * undesirable. This is why we need to make this check * manually. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated bzip2 file body"); return (ARCHIVE_FATAL); } /* Setup buffer boundaries. */ zip->bzstream.next_in = (char*)(uintptr_t) compressed_buff; zip->bzstream.avail_in = in_bytes; zip->bzstream.total_in_hi32 = 0; zip->bzstream.total_in_lo32 = 0; zip->bzstream.next_out = (char*) zip->uncompressed_buffer; zip->bzstream.avail_out = zip->uncompressed_buffer_size; zip->bzstream.total_out_hi32 = 0; zip->bzstream.total_out_lo32 = 0; /* Perform the decompression. */ r = BZ2_bzDecompress(&zip->bzstream); switch(r) { case BZ_STREAM_END: /* If we're at the end of the stream, deinitialize the * decompression context now. */ switch(BZ2_bzDecompressEnd(&zip->bzstream)) { case BZ_OK: break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Failed to clean up bzip2 " "decompressor"); return ARCHIVE_FATAL; } zip->end_of_entry = 1; break; case BZ_OK: /* The decompressor has successfully decoded this * chunk of data, but more data is still in queue. */ break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "bzip2 decompression failed"); return ARCHIVE_FATAL; } /* Update the pointers so decompressor can continue decoding. */ to_consume = zip->bzstream.total_in_lo32; __archive_read_consume(a, to_consume); total_out = ((uint64_t) zip->bzstream.total_out_hi32 << 32) + zip->bzstream.total_out_lo32; zip->entry_bytes_remaining -= to_consume; zip->entry_compressed_bytes_read += to_consume; zip->entry_uncompressed_bytes_read += total_out; /* Give libarchive its due. */ *size = total_out; *buff = zip->uncompressed_buffer; /* Seek for optional marker, like in other entries. */ r = consume_optional_marker(a, zip); if(r != ARCHIVE_OK) return r; return ARCHIVE_OK; } #endif #if HAVE_ZSTD_H && HAVE_LIBZSTD static int zipx_zstd_init(struct archive_read *a, struct zip *zip) { size_t r; /* Deallocate already existing Zstd decompression context if it * exists. */ if(zip->zstdstream_valid) { ZSTD_freeDStream(zip->zstdstream); zip->zstdstream_valid = 0; } /* Allocate a new Zstd decompression context. */ zip->zstdstream = ZSTD_createDStream(); r = ZSTD_initDStream(zip->zstdstream); if (ZSTD_isError(r)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Error initializing zstd decompressor: %s", ZSTD_getErrorName(r)); return ARCHIVE_FAILED; } /* Mark the zstdstream field to be released in cleanup phase. */ zip->zstdstream_valid = 1; /* (Re)allocate the buffer that will contain decompressed bytes. */ free(zip->uncompressed_buffer); zip->uncompressed_buffer_size = ZSTD_DStreamOutSize(); zip->uncompressed_buffer = (uint8_t*) malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for Zstd decompression"); return ARCHIVE_FATAL; } /* Initialization done. */ zip->decompress_init = 1; return ARCHIVE_OK; } static int zip_read_data_zipx_zstd(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip *zip = (struct zip *)(a->format->data); ssize_t bytes_avail = 0, in_bytes, to_consume; const void *compressed_buff; int r; size_t ret; uint64_t total_out; ZSTD_outBuffer out; ZSTD_inBuffer in; (void) offset; /* UNUSED */ /* Initialize decompression context if we're here for the first time. */ if(!zip->decompress_init) { r = zipx_zstd_init(a, zip); if(r != ARCHIVE_OK) return r; } /* Fetch more compressed bytes */ compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); if(bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated zstd file body"); return (ARCHIVE_FATAL); } in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); if(in_bytes < 1) { /* zstd doesn't complain when caller feeds avail_in == 0. * It will actually return success in this case, which is * undesirable. This is why we need to make this check * manually. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated zstd file body"); return (ARCHIVE_FATAL); } /* Setup buffer boundaries */ in.src = compressed_buff; in.size = in_bytes; in.pos = 0; out = (ZSTD_outBuffer) { zip->uncompressed_buffer, zip->uncompressed_buffer_size, 0 }; /* Perform the decompression. */ ret = ZSTD_decompressStream(zip->zstdstream, &out, &in); if (ZSTD_isError(ret)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Error during zstd decompression: %s", ZSTD_getErrorName(ret)); return (ARCHIVE_FATAL); } /* Check end of the stream. */ if (ret == 0) { if ((in.pos == in.size) && (out.pos < out.size)) { zip->end_of_entry = 1; ZSTD_freeDStream(zip->zstdstream); zip->zstdstream_valid = 0; } } /* Update the pointers so decompressor can continue decoding. */ to_consume = in.pos; __archive_read_consume(a, to_consume); total_out = out.pos; zip->entry_bytes_remaining -= to_consume; zip->entry_compressed_bytes_read += to_consume; zip->entry_uncompressed_bytes_read += total_out; /* Give libarchive its due. */ *size = total_out; *buff = zip->uncompressed_buffer; /* Seek for optional marker, like in other entries. */ r = consume_optional_marker(a, zip); if(r != ARCHIVE_OK) return r; return ARCHIVE_OK; } #endif #ifdef HAVE_ZLIB_H static int zip_deflate_init(struct archive_read *a, struct zip *zip) { int r; /* If we haven't yet read any data, initialize the decompressor. */ if (!zip->decompress_init) { if (zip->stream_valid) r = inflateReset(&zip->stream); else r = inflateInit2(&zip->stream, -15 /* Don't check for zlib header */); if (r != Z_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't initialize ZIP decompression."); return (ARCHIVE_FATAL); } /* Stream structure has been set up. */ zip->stream_valid = 1; /* We've initialized decompression for this stream. */ zip->decompress_init = 1; } return (ARCHIVE_OK); } static int zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip *zip; ssize_t bytes_avail; const void *compressed_buff, *sp; int r; (void)offset; /* UNUSED */ zip = (struct zip *)(a->format->data); /* If the buffer hasn't been allocated, allocate it now. */ if (zip->uncompressed_buffer == NULL) { zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (unsigned char *)malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for ZIP decompression"); return (ARCHIVE_FATAL); } } r = zip_deflate_init(a, zip); if (r != ARCHIVE_OK) return (r); /* * Note: '1' here is a performance optimization. * Recall that the decompression layer returns a count of * available bytes; asking for more than that forces the * decompressor to combine reads by copying data. */ compressed_buff = sp = __archive_read_ahead(a, 1, &bytes_avail); if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END) && bytes_avail > zip->entry_bytes_remaining) { bytes_avail = (ssize_t)zip->entry_bytes_remaining; } if (bytes_avail < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file body"); return (ARCHIVE_FATAL); } if (zip->tctx_valid || zip->cctx_valid) { if (zip->decrypted_bytes_remaining < (size_t)bytes_avail) { size_t buff_remaining = (zip->decrypted_buffer + zip->decrypted_buffer_size) - (zip->decrypted_ptr + zip->decrypted_bytes_remaining); if (buff_remaining > (size_t)bytes_avail) buff_remaining = (size_t)bytes_avail; if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END) && zip->entry_bytes_remaining > 0) { if ((int64_t)(zip->decrypted_bytes_remaining + buff_remaining) > zip->entry_bytes_remaining) { if (zip->entry_bytes_remaining < (int64_t)zip->decrypted_bytes_remaining) buff_remaining = 0; else buff_remaining = (size_t)zip->entry_bytes_remaining - zip->decrypted_bytes_remaining; } } if (buff_remaining > 0) { if (zip->tctx_valid) { trad_enc_decrypt_update(&zip->tctx, compressed_buff, buff_remaining, zip->decrypted_ptr + zip->decrypted_bytes_remaining, buff_remaining); } else { size_t dsize = buff_remaining; archive_decrypto_aes_ctr_update( &zip->cctx, compressed_buff, buff_remaining, zip->decrypted_ptr + zip->decrypted_bytes_remaining, &dsize); } zip->decrypted_bytes_remaining += buff_remaining; } } bytes_avail = zip->decrypted_bytes_remaining; compressed_buff = (const char *)zip->decrypted_ptr; } /* * A bug in zlib.h: stream.next_in should be marked 'const' * but isn't (the library never alters data through the * next_in pointer, only reads it). The result: this ugly * cast to remove 'const'. */ zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; zip->stream.avail_in = (uInt)bytes_avail; zip->stream.total_in = 0; zip->stream.next_out = zip->uncompressed_buffer; zip->stream.avail_out = (uInt)zip->uncompressed_buffer_size; zip->stream.total_out = 0; r = inflate(&zip->stream, 0); switch (r) { case Z_OK: break; case Z_STREAM_END: zip->end_of_entry = 1; break; case Z_MEM_ERROR: archive_set_error(&a->archive, ENOMEM, "Out of memory for ZIP decompression"); return (ARCHIVE_FATAL); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP decompression failed (%d)", r); return (ARCHIVE_FATAL); } /* Consume as much as the compressor actually used. */ bytes_avail = zip->stream.total_in; if (zip->tctx_valid || zip->cctx_valid) { zip->decrypted_bytes_remaining -= bytes_avail; if (zip->decrypted_bytes_remaining == 0) zip->decrypted_ptr = zip->decrypted_buffer; else zip->decrypted_ptr += bytes_avail; } /* Calculate compressed data as much as we used.*/ if (zip->hctx_valid) archive_hmac_sha1_update(&zip->hctx, sp, bytes_avail); __archive_read_consume(a, bytes_avail); zip->entry_bytes_remaining -= bytes_avail; zip->entry_compressed_bytes_read += bytes_avail; *size = zip->stream.total_out; zip->entry_uncompressed_bytes_read += zip->stream.total_out; *buff = zip->uncompressed_buffer; if (zip->end_of_entry && zip->hctx_valid) { r = check_authentication_code(a, NULL); if (r != ARCHIVE_OK) return (r); } r = consume_optional_marker(a, zip); if (r != ARCHIVE_OK) return (r); return (ARCHIVE_OK); } #endif static int read_decryption_header(struct archive_read *a) { struct zip *zip = (struct zip *)(a->format->data); const char *p; unsigned int remaining_size; unsigned int ts; /* * Read an initialization vector data field. */ p = __archive_read_ahead(a, 2, NULL); if (p == NULL) goto truncated; ts = zip->iv_size; zip->iv_size = archive_le16dec(p); __archive_read_consume(a, 2); if (ts < zip->iv_size) { free(zip->iv); zip->iv = NULL; } p = __archive_read_ahead(a, zip->iv_size, NULL); if (p == NULL) goto truncated; if (zip->iv == NULL) { zip->iv = malloc(zip->iv_size); if (zip->iv == NULL) goto nomem; } memcpy(zip->iv, p, zip->iv_size); __archive_read_consume(a, zip->iv_size); /* * Read a size of remaining decryption header field. */ p = __archive_read_ahead(a, 14, NULL); if (p == NULL) goto truncated; remaining_size = archive_le32dec(p); if (remaining_size < 16 || remaining_size > (1 << 18)) goto corrupted; /* Check if format version is supported. */ if (archive_le16dec(p+4) != 3) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported encryption format version: %u", archive_le16dec(p+4)); return (ARCHIVE_FAILED); } /* * Read an encryption algorithm field. */ zip->alg_id = archive_le16dec(p+6); switch (zip->alg_id) { case 0x6601:/* DES */ case 0x6602:/* RC2 */ case 0x6603:/* 3DES 168 */ case 0x6609:/* 3DES 112 */ case 0x660E:/* AES 128 */ case 0x660F:/* AES 192 */ case 0x6610:/* AES 256 */ case 0x6702:/* RC2 (version >= 5.2) */ case 0x6720:/* Blowfish */ case 0x6721:/* Twofish */ case 0x6801:/* RC4 */ /* Supported encryption algorithm. */ break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown encryption algorithm: %u", zip->alg_id); return (ARCHIVE_FAILED); } /* * Read a bit length field. */ zip->bit_len = archive_le16dec(p+8); /* * Read a flags field. */ zip->flags = archive_le16dec(p+10); switch (zip->flags & 0xf000) { case 0x0001: /* Password is required to decrypt. */ case 0x0002: /* Certificates only. */ case 0x0003: /* Password or certificate required to decrypt. */ break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown encryption flag: %u", zip->flags); return (ARCHIVE_FAILED); } if ((zip->flags & 0xf000) == 0 || (zip->flags & 0xf000) == 0x4000) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown encryption flag: %u", zip->flags); return (ARCHIVE_FAILED); } /* * Read an encrypted random data field. */ ts = zip->erd_size; zip->erd_size = archive_le16dec(p+12); __archive_read_consume(a, 14); if ((zip->erd_size & 0xf) != 0 || (zip->erd_size + 16) > remaining_size || (zip->erd_size + 16) < zip->erd_size) goto corrupted; if (ts < zip->erd_size) { free(zip->erd); zip->erd = NULL; } p = __archive_read_ahead(a, zip->erd_size, NULL); if (p == NULL) goto truncated; if (zip->erd == NULL) { zip->erd = malloc(zip->erd_size); if (zip->erd == NULL) goto nomem; } memcpy(zip->erd, p, zip->erd_size); __archive_read_consume(a, zip->erd_size); /* * Read a reserved data field. */ p = __archive_read_ahead(a, 4, NULL); if (p == NULL) goto truncated; /* Reserved data size should be zero. */ if (archive_le32dec(p) != 0) goto corrupted; __archive_read_consume(a, 4); /* * Read a password validation data field. */ p = __archive_read_ahead(a, 2, NULL); if (p == NULL) goto truncated; ts = zip->v_size; zip->v_size = archive_le16dec(p); __archive_read_consume(a, 2); if ((zip->v_size & 0x0f) != 0 || (zip->erd_size + zip->v_size + 16) > remaining_size || (zip->erd_size + zip->v_size + 16) < (zip->erd_size + zip->v_size)) goto corrupted; if (ts < zip->v_size) { free(zip->v_data); zip->v_data = NULL; } p = __archive_read_ahead(a, zip->v_size, NULL); if (p == NULL) goto truncated; if (zip->v_data == NULL) { zip->v_data = malloc(zip->v_size); if (zip->v_data == NULL) goto nomem; } memcpy(zip->v_data, p, zip->v_size); __archive_read_consume(a, zip->v_size); p = __archive_read_ahead(a, 4, NULL); if (p == NULL) goto truncated; zip->v_crc32 = archive_le32dec(p); __archive_read_consume(a, 4); /*return (ARCHIVE_OK); * This is not fully implemented yet.*/ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Encrypted file is unsupported"); return (ARCHIVE_FAILED); truncated: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); corrupted: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Corrupted ZIP file data"); return (ARCHIVE_FATAL); nomem: archive_set_error(&a->archive, ENOMEM, "No memory for ZIP decryption"); return (ARCHIVE_FATAL); } static int zip_alloc_decryption_buffer(struct archive_read *a) { struct zip *zip = (struct zip *)(a->format->data); size_t bs = 256 * 1024; if (zip->decrypted_buffer == NULL) { zip->decrypted_buffer_size = bs; zip->decrypted_buffer = malloc(bs); if (zip->decrypted_buffer == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory for ZIP decryption"); return (ARCHIVE_FATAL); } } zip->decrypted_ptr = zip->decrypted_buffer; return (ARCHIVE_OK); } static int init_traditional_PKWARE_decryption(struct archive_read *a) { struct zip *zip = (struct zip *)(a->format->data); const void *p; int retry; int r; if (zip->tctx_valid) return (ARCHIVE_OK); /* Read the 12 bytes encryption header stored at the start of the data area. */ #define ENC_HEADER_SIZE 12 if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END) && zip->entry_bytes_remaining < ENC_HEADER_SIZE) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated Zip encrypted body: only %jd bytes available", (intmax_t)zip->entry_bytes_remaining); return (ARCHIVE_FATAL); } p = __archive_read_ahead(a, ENC_HEADER_SIZE, NULL); if (p == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } for (retry = 0;; retry++) { const char *passphrase; uint8_t crcchk; passphrase = __archive_read_next_passphrase(a); if (passphrase == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, (retry > 0)? "Incorrect passphrase": "Passphrase required for this entry"); return (ARCHIVE_FAILED); } /* * Initialize ctx for Traditional PKWARE Decryption. */ r = trad_enc_init(&zip->tctx, passphrase, strlen(passphrase), p, ENC_HEADER_SIZE, &crcchk); if (r == 0 && crcchk == zip->entry->decdat) break;/* The passphrase is OK. */ if (retry > 10000) { /* Avoid infinity loop. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Too many incorrect passphrases"); return (ARCHIVE_FAILED); } } __archive_read_consume(a, ENC_HEADER_SIZE); zip->tctx_valid = 1; if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { zip->entry_bytes_remaining -= ENC_HEADER_SIZE; } /*zip->entry_uncompressed_bytes_read += ENC_HEADER_SIZE;*/ zip->entry_compressed_bytes_read += ENC_HEADER_SIZE; zip->decrypted_bytes_remaining = 0; return (zip_alloc_decryption_buffer(a)); #undef ENC_HEADER_SIZE } static int init_WinZip_AES_decryption(struct archive_read *a) { struct zip *zip = (struct zip *)(a->format->data); const void *p; const uint8_t *pv; size_t key_len, salt_len; uint8_t derived_key[MAX_DERIVED_KEY_BUF_SIZE]; int retry; int r; if (zip->cctx_valid || zip->hctx_valid) return (ARCHIVE_OK); switch (zip->entry->aes_extra.strength) { case 1: salt_len = 8; key_len = 16; break; case 2: salt_len = 12; key_len = 24; break; case 3: salt_len = 16; key_len = 32; break; default: goto corrupted; } p = __archive_read_ahead(a, salt_len + 2, NULL); if (p == NULL) goto truncated; for (retry = 0;; retry++) { const char *passphrase; passphrase = __archive_read_next_passphrase(a); if (passphrase == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, (retry > 0)? "Incorrect passphrase": "Passphrase required for this entry"); return (ARCHIVE_FAILED); } memset(derived_key, 0, sizeof(derived_key)); r = archive_pbkdf2_sha1(passphrase, strlen(passphrase), p, salt_len, 1000, derived_key, key_len * 2 + 2); if (r != 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Decryption is unsupported due to lack of " "crypto library"); return (ARCHIVE_FAILED); } /* Check password verification value. */ pv = ((const uint8_t *)p) + salt_len; if (derived_key[key_len * 2] == pv[0] && derived_key[key_len * 2 + 1] == pv[1]) break;/* The passphrase is OK. */ if (retry > 10000) { /* Avoid infinity loop. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Too many incorrect passphrases"); return (ARCHIVE_FAILED); } } r = archive_decrypto_aes_ctr_init(&zip->cctx, derived_key, key_len); if (r != 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Decryption is unsupported due to lack of crypto library"); return (ARCHIVE_FAILED); } r = archive_hmac_sha1_init(&zip->hctx, derived_key + key_len, key_len); if (r != 0) { archive_decrypto_aes_ctr_release(&zip->cctx); archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Failed to initialize HMAC-SHA1"); return (ARCHIVE_FAILED); } zip->cctx_valid = zip->hctx_valid = 1; __archive_read_consume(a, salt_len + 2); zip->entry_bytes_remaining -= salt_len + 2 + AUTH_CODE_SIZE; if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END) && zip->entry_bytes_remaining < 0) goto corrupted; zip->entry_compressed_bytes_read += salt_len + 2 + AUTH_CODE_SIZE; zip->decrypted_bytes_remaining = 0; zip->entry->compression = zip->entry->aes_extra.compression; return (zip_alloc_decryption_buffer(a)); truncated: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); corrupted: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Corrupted ZIP file data"); return (ARCHIVE_FATAL); } static int archive_read_format_zip_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { int r; struct zip *zip = (struct zip *)(a->format->data); if (zip->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { zip->has_encrypted_entries = 0; } *offset = zip->entry_uncompressed_bytes_read; *size = 0; *buff = NULL; /* If we hit end-of-entry last time, return ARCHIVE_EOF. */ if (zip->end_of_entry) return (ARCHIVE_EOF); /* Return EOF immediately if this is a non-regular file. */ if (AE_IFREG != (zip->entry->mode & AE_IFMT)) return (ARCHIVE_EOF); __archive_read_consume(a, zip->unconsumed); zip->unconsumed = 0; if (zip->init_decryption) { zip->has_encrypted_entries = 1; if (zip->entry->zip_flags & ZIP_STRONG_ENCRYPTED) r = read_decryption_header(a); else if (zip->entry->compression == WINZIP_AES_ENCRYPTION) r = init_WinZip_AES_decryption(a); else r = init_traditional_PKWARE_decryption(a); if (r != ARCHIVE_OK) return (r); zip->init_decryption = 0; } switch(zip->entry->compression) { case 0: /* No compression. */ r = zip_read_data_none(a, buff, size, offset); break; #ifdef HAVE_BZLIB_H case 12: /* ZIPx bzip2 compression. */ r = zip_read_data_zipx_bzip2(a, buff, size, offset); break; #endif #if HAVE_LZMA_H && HAVE_LIBLZMA case 14: /* ZIPx LZMA compression. */ r = zip_read_data_zipx_lzma_alone(a, buff, size, offset); break; case 95: /* ZIPx XZ compression. */ r = zip_read_data_zipx_xz(a, buff, size, offset); break; #endif #if HAVE_ZSTD_H && HAVE_LIBZSTD case 93: /* ZIPx Zstd compression. */ r = zip_read_data_zipx_zstd(a, buff, size, offset); break; #endif /* PPMd support is built-in, so we don't need any #if guards. */ case 98: /* ZIPx PPMd compression. */ r = zip_read_data_zipx_ppmd(a, buff, size, offset); break; #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ r = zip_read_data_deflate(a, buff, size, offset); break; #endif default: /* Unsupported compression. */ /* Return a warning. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported ZIP compression method (%d: %s)", zip->entry->compression, compression_name(zip->entry->compression)); /* We can't decompress this entry, but we will * be able to skip() it and try the next entry. */ return (ARCHIVE_FAILED); break; } if (r != ARCHIVE_OK) return (r); /* Update checksum */ if (*size) zip->entry_crc32 = zip->crc32func(zip->entry_crc32, *buff, (unsigned)*size); /* If we hit the end, swallow any end-of-data marker. */ if (zip->end_of_entry) { /* Check file size, CRC against these values. */ if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP compressed data is wrong size " "(read %jd, expected %jd)", (intmax_t)zip->entry_compressed_bytes_read, (intmax_t)zip->entry->compressed_size); return (ARCHIVE_WARN); } /* Size field only stores the lower 32 bits of the actual * size. */ if ((zip->entry->uncompressed_size & UINT32_MAX) != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP uncompressed data is wrong size " "(read %jd, expected %jd)\n", (intmax_t)zip->entry_uncompressed_bytes_read, (intmax_t)zip->entry->uncompressed_size); return (ARCHIVE_WARN); } /* Check computed CRC against header */ if ((!zip->hctx_valid || zip->entry->aes_extra.vendor != AES_VENDOR_AE_2) && zip->entry->crc32 != zip->entry_crc32 && !zip->ignore_crc32) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP bad CRC: 0x%lx should be 0x%lx", (unsigned long)zip->entry_crc32, (unsigned long)zip->entry->crc32); return (ARCHIVE_WARN); } } return (ARCHIVE_OK); } static int archive_read_format_zip_cleanup(struct archive_read *a) { struct zip *zip; struct zip_entry *zip_entry, *next_zip_entry; zip = (struct zip *)(a->format->data); #ifdef HAVE_ZLIB_H if (zip->stream_valid) inflateEnd(&zip->stream); #endif #if HAVE_LZMA_H && HAVE_LIBLZMA if (zip->zipx_lzma_valid) { lzma_end(&zip->zipx_lzma_stream); } #endif #ifdef HAVE_BZLIB_H if (zip->bzstream_valid) { BZ2_bzDecompressEnd(&zip->bzstream); } #endif #if HAVE_ZSTD_H && HAVE_LIBZSTD if (zip->zstdstream_valid) { ZSTD_freeDStream(zip->zstdstream); } #endif free(zip->uncompressed_buffer); if (zip->ppmd8_valid) __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); if (zip->zip_entries) { zip_entry = zip->zip_entries; while (zip_entry != NULL) { next_zip_entry = zip_entry->next; archive_string_free(&zip_entry->rsrcname); free(zip_entry); zip_entry = next_zip_entry; } } free(zip->decrypted_buffer); if (zip->cctx_valid) archive_decrypto_aes_ctr_release(&zip->cctx); if (zip->hctx_valid) archive_hmac_sha1_cleanup(&zip->hctx); free(zip->iv); free(zip->erd); free(zip->v_data); archive_string_free(&zip->format_name); free(zip); (a->format->data) = NULL; return (ARCHIVE_OK); } static int archive_read_format_zip_has_encrypted_entries(struct archive_read *_a) { if (_a && _a->format) { struct zip * zip = (struct zip *)_a->format->data; if (zip) { return zip->has_encrypted_entries; } } return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; } static int archive_read_format_zip_options(struct archive_read *a, const char *key, const char *val) { struct zip *zip; int ret = ARCHIVE_FAILED; zip = (struct zip *)(a->format->data); if (strcmp(key, "compat-2x") == 0) { /* Handle filenames as libarchive 2.x */ zip->init_default_conversion = (val != NULL) ? 1 : 0; return (ARCHIVE_OK); } else if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "zip: hdrcharset option needs a character-set name" ); else { zip->sconv = archive_string_conversion_from_charset( &a->archive, val, 0); if (zip->sconv != NULL) { if (strcmp(val, "UTF-8") == 0) zip->sconv_utf8 = zip->sconv; ret = ARCHIVE_OK; } else ret = ARCHIVE_FATAL; } return (ret); } else if (strcmp(key, "ignorecrc32") == 0) { /* Mostly useful for testing. */ if (val == NULL || val[0] == 0) { zip->crc32func = real_crc32; zip->ignore_crc32 = 0; } else { zip->crc32func = fake_crc32; zip->ignore_crc32 = 1; } return (ARCHIVE_OK); } else if (strcmp(key, "mac-ext") == 0) { zip->process_mac_extensions = (val != NULL && val[0] != 0); return (ARCHIVE_OK); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } int archive_read_support_format_zip(struct archive *a) { int r; r = archive_read_support_format_zip_streamable(a); if (r != ARCHIVE_OK) return r; return (archive_read_support_format_zip_seekable(a)); } /* ------------------------------------------------------------------------ */ /* * Streaming-mode support */ static int archive_read_support_format_zip_capabilities_streamable(struct archive_read * a) { (void)a; /* UNUSED */ return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA); } static int archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) { const char *p; (void)best_bid; /* UNUSED */ if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) return (-1); /* * Bid of 29 here comes from: * + 16 bits for "PK", * + next 16-bit field has 6 options so contributes * about 16 - log_2(6) ~= 16 - 2.6 ~= 13 bits * * So we've effectively verified ~29 total bits of check data. */ if (p[0] == 'P' && p[1] == 'K') { if ((p[2] == '\001' && p[3] == '\002') || (p[2] == '\003' && p[3] == '\004') || (p[2] == '\005' && p[3] == '\006') || (p[2] == '\006' && p[3] == '\006') || (p[2] == '\007' && p[3] == '\010') || (p[2] == '0' && p[3] == '0')) return (29); } /* TODO: It's worth looking ahead a little bit for a valid * PK signature. In particular, that would make it possible * to read some UUEncoded SFX files or SFX files coming from * a network socket. */ return (0); } static int archive_read_format_zip_streamable_read_header(struct archive_read *a, struct archive_entry *entry) { struct zip *zip; a->archive.archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive.archive_format_name == NULL) a->archive.archive_format_name = "ZIP"; zip = (struct zip *)(a->format->data); /* * It should be sufficient to call archive_read_next_header() for * a reader to determine if an entry is encrypted or not. If the * encryption of an entry is only detectable when calling * archive_read_data(), so be it. We'll do the same check there * as well. */ if (zip->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) zip->has_encrypted_entries = 0; /* Make sure we have a zip_entry structure to use. */ if (zip->zip_entries == NULL) { zip->zip_entries = malloc(sizeof(struct zip_entry)); if (zip->zip_entries == NULL) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); return ARCHIVE_FATAL; } } zip->entry = zip->zip_entries; memset(zip->entry, 0, sizeof(struct zip_entry)); if (zip->cctx_valid) archive_decrypto_aes_ctr_release(&zip->cctx); if (zip->hctx_valid) archive_hmac_sha1_cleanup(&zip->hctx); zip->tctx_valid = zip->cctx_valid = zip->hctx_valid = 0; __archive_read_reset_passphrase(a); /* Search ahead for the next local file header. */ __archive_read_consume(a, zip->unconsumed); zip->unconsumed = 0; for (;;) { int64_t skipped = 0; const char *p, *end; ssize_t bytes; p = __archive_read_ahead(a, 4, &bytes); if (p == NULL) return (ARCHIVE_FATAL); end = p + bytes; while (p + 4 <= end) { if (p[0] == 'P' && p[1] == 'K') { if (p[2] == '\003' && p[3] == '\004') { /* Regular file entry. */ __archive_read_consume(a, skipped); return zip_read_local_file_header(a, entry, zip); } /* * TODO: We cannot restore permissions * based only on the local file headers. * Consider scanning the central * directory and returning additional * entries for at least directories. * This would allow us to properly set * directory permissions. * * This won't help us fix symlinks * and may not help with regular file * permissions, either. */ if (p[2] == '\001' && p[3] == '\002') { return (ARCHIVE_EOF); } /* End of central directory? Must be an * empty archive. */ if ((p[2] == '\005' && p[3] == '\006') || (p[2] == '\006' && p[3] == '\006')) return (ARCHIVE_EOF); } ++p; ++skipped; } __archive_read_consume(a, skipped); } } static int archive_read_format_zip_read_data_skip_streamable(struct archive_read *a) { struct zip *zip; int64_t bytes_skipped; zip = (struct zip *)(a->format->data); bytes_skipped = __archive_read_consume(a, zip->unconsumed); zip->unconsumed = 0; if (bytes_skipped < 0) return (ARCHIVE_FATAL); /* If we've already read to end of data, we're done. */ if (zip->end_of_entry) return (ARCHIVE_OK); /* So we know we're streaming... */ if (0 == (zip->entry->zip_flags & ZIP_LENGTH_AT_END) || zip->entry->compressed_size > 0) { /* We know the compressed length, so we can just skip. */ bytes_skipped = __archive_read_consume(a, zip->entry_bytes_remaining); if (bytes_skipped < 0) return (ARCHIVE_FATAL); return (ARCHIVE_OK); } if (zip->init_decryption) { int r; zip->has_encrypted_entries = 1; if (zip->entry->zip_flags & ZIP_STRONG_ENCRYPTED) r = read_decryption_header(a); else if (zip->entry->compression == WINZIP_AES_ENCRYPTION) r = init_WinZip_AES_decryption(a); else r = init_traditional_PKWARE_decryption(a); if (r != ARCHIVE_OK) return (r); zip->init_decryption = 0; } /* We're streaming and we don't know the length. */ /* If the body is compressed and we know the format, we can * find an exact end-of-entry by decompressing it. */ switch (zip->entry->compression) { #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ while (!zip->end_of_entry) { int64_t offset = 0; const void *buff = NULL; size_t size = 0; int r; r = zip_read_data_deflate(a, &buff, &size, &offset); if (r != ARCHIVE_OK) return (r); } return ARCHIVE_OK; #endif default: /* Uncompressed or unknown. */ /* Scan for a PK\007\010 signature. */ for (;;) { const char *p, *buff; ssize_t bytes_avail; buff = __archive_read_ahead(a, 16, &bytes_avail); if (bytes_avail < 16) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } p = buff; while (p <= buff + bytes_avail - 16) { if (p[3] == 'P') { p += 3; } else if (p[3] == 'K') { p += 2; } else if (p[3] == '\007') { p += 1; } else if (p[3] == '\010' && p[2] == '\007' && p[1] == 'K' && p[0] == 'P') { if (zip->entry->flags & LA_USED_ZIP64) __archive_read_consume(a, p - buff + 24); else __archive_read_consume(a, p - buff + 16); return ARCHIVE_OK; } else { p += 4; } } __archive_read_consume(a, p - buff); } } } int archive_read_support_format_zip_streamable(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct zip *zip; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_zip"); zip = (struct zip *)calloc(1, sizeof(*zip)); if (zip == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data"); return (ARCHIVE_FATAL); } /* Streamable reader doesn't support mac extensions. */ zip->process_mac_extensions = 0; /* * Until enough data has been read, we cannot tell about * any encrypted entries yet. */ zip->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; zip->crc32func = real_crc32; r = __archive_read_register_format(a, zip, "zip", archive_read_format_zip_streamable_bid, archive_read_format_zip_options, archive_read_format_zip_streamable_read_header, archive_read_format_zip_read_data, archive_read_format_zip_read_data_skip_streamable, NULL, archive_read_format_zip_cleanup, archive_read_support_format_zip_capabilities_streamable, archive_read_format_zip_has_encrypted_entries); if (r != ARCHIVE_OK) free(zip); return (ARCHIVE_OK); } /* ------------------------------------------------------------------------ */ /* * Seeking-mode support */ static int archive_read_support_format_zip_capabilities_seekable(struct archive_read * a) { (void)a; /* UNUSED */ return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA); } /* * TODO: This is a performance sink because it forces the read core to * drop buffered data from the start of file, which will then have to * be re-read again if this bidder loses. * * We workaround this a little by passing in the best bid so far so * that later bidders can do nothing if they know they'll never * outbid. But we can certainly do better... */ static int read_eocd(struct zip *zip, const char *p, int64_t current_offset) { uint16_t disk_num; uint32_t cd_size, cd_offset; disk_num = archive_le16dec(p + 4); cd_size = archive_le32dec(p + 12); cd_offset = archive_le32dec(p + 16); /* Sanity-check the EOCD we've found. */ /* This must be the first volume. */ if (disk_num != 0) return 0; /* Central directory must be on this volume. */ if (disk_num != archive_le16dec(p + 6)) return 0; /* All central directory entries must be on this volume. */ if (archive_le16dec(p + 10) != archive_le16dec(p + 8)) return 0; /* Central directory can't extend beyond start of EOCD record. */ if (cd_offset + cd_size > current_offset) return 0; /* Save the central directory location for later use. */ zip->central_directory_offset = cd_offset; zip->central_directory_offset_adjusted = current_offset - cd_size; /* This is just a tiny bit higher than the maximum returned by the streaming Zip bidder. This ensures that the more accurate seeking Zip parser wins whenever seek is available. */ return 32; } /* * Examine Zip64 EOCD locator: If it's valid, store the information * from it. */ static int read_zip64_eocd(struct archive_read *a, struct zip *zip, const char *p) { int64_t eocd64_offset; int64_t eocd64_size; /* Sanity-check the locator record. */ /* Central dir must be on first volume. */ if (archive_le32dec(p + 4) != 0) return 0; /* Must be only a single volume. */ if (archive_le32dec(p + 16) != 1) return 0; /* Find the Zip64 EOCD record. */ eocd64_offset = archive_le64dec(p + 8); if (__archive_read_seek(a, eocd64_offset, SEEK_SET) < 0) return 0; if ((p = __archive_read_ahead(a, 56, NULL)) == NULL) return 0; /* Make sure we can read all of it. */ eocd64_size = archive_le64dec(p + 4) + 12; if (eocd64_size < 56 || eocd64_size > 16384) return 0; if ((p = __archive_read_ahead(a, (size_t)eocd64_size, NULL)) == NULL) return 0; /* Sanity-check the EOCD64 */ if (archive_le32dec(p + 16) != 0) /* Must be disk #0 */ return 0; if (archive_le32dec(p + 20) != 0) /* CD must be on disk #0 */ return 0; /* CD can't be split. */ if (archive_le64dec(p + 24) != archive_le64dec(p + 32)) return 0; /* Save the central directory offset for later use. */ zip->central_directory_offset = archive_le64dec(p + 48); /* TODO: Needs scanning backwards to find the eocd64 instead of assuming */ zip->central_directory_offset_adjusted = zip->central_directory_offset; return 32; } static int archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) { struct zip *zip = (struct zip *)a->format->data; int64_t file_size, current_offset; const char *p; int i, tail; /* If someone has already bid more than 32, then avoid trashing the look-ahead buffers with a seek. */ if (best_bid > 32) return (-1); file_size = __archive_read_seek(a, 0, SEEK_END); if (file_size <= 0) return 0; /* Search last 16k of file for end-of-central-directory * record (which starts with PK\005\006) */ tail = (int)zipmin(1024 * 16, file_size); current_offset = __archive_read_seek(a, -tail, SEEK_END); if (current_offset < 0) return 0; if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL) return 0; /* Boyer-Moore search backwards from the end, since we want * to match the last EOCD in the file (there can be more than * one if there is an uncompressed Zip archive as a member * within this Zip archive). */ for (i = tail - 22; i > 0;) { switch (p[i]) { case 'P': if (memcmp(p + i, "PK\005\006", 4) == 0) { int ret = read_eocd(zip, p + i, current_offset + i); /* Zip64 EOCD locator precedes * regular EOCD if present. */ if (i >= 20 && memcmp(p + i - 20, "PK\006\007", 4) == 0) { int ret_zip64 = read_zip64_eocd(a, zip, p + i - 20); if (ret_zip64 > ret) ret = ret_zip64; } return (ret); } i -= 4; break; case 'K': i -= 1; break; case 005: i -= 2; break; case 006: i -= 3; break; default: i -= 4; break; } } return 0; } /* The red-black trees are only used in seeking mode to manage * the in-memory copy of the central directory. */ static int cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) { const struct zip_entry *e1 = (const struct zip_entry *)n1; const struct zip_entry *e2 = (const struct zip_entry *)n2; if (e1->local_header_offset > e2->local_header_offset) return -1; if (e1->local_header_offset < e2->local_header_offset) return 1; return 0; } static int cmp_key(const struct archive_rb_node *n, const void *key) { /* This function won't be called */ (void)n; /* UNUSED */ (void)key; /* UNUSED */ return 1; } static const struct archive_rb_tree_ops rb_ops = { &cmp_node, &cmp_key }; static int rsrc_cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) { const struct zip_entry *e1 = (const struct zip_entry *)n1; const struct zip_entry *e2 = (const struct zip_entry *)n2; return (strcmp(e2->rsrcname.s, e1->rsrcname.s)); } static int rsrc_cmp_key(const struct archive_rb_node *n, const void *key) { const struct zip_entry *e = (const struct zip_entry *)n; return (strcmp((const char *)key, e->rsrcname.s)); } static const struct archive_rb_tree_ops rb_rsrc_ops = { &rsrc_cmp_node, &rsrc_cmp_key }; static const char * rsrc_basename(const char *name, size_t name_length) { const char *s, *r; r = s = name; for (;;) { s = memchr(s, '/', name_length - (s - name)); if (s == NULL) break; r = ++s; } return (r); } static void expose_parent_dirs(struct zip *zip, const char *name, size_t name_length) { struct archive_string str; struct zip_entry *dir; char *s; archive_string_init(&str); archive_strncpy(&str, name, name_length); for (;;) { s = strrchr(str.s, '/'); if (s == NULL) break; *s = '\0'; /* Transfer the parent directory from zip->tree_rsrc RB * tree to zip->tree RB tree to expose. */ dir = (struct zip_entry *) __archive_rb_tree_find_node(&zip->tree_rsrc, str.s); if (dir == NULL) break; __archive_rb_tree_remove_node(&zip->tree_rsrc, &dir->node); archive_string_free(&dir->rsrcname); __archive_rb_tree_insert_node(&zip->tree, &dir->node); } archive_string_free(&str); } static int slurp_central_directory(struct archive_read *a, struct archive_entry* entry, struct zip *zip) { ssize_t i; unsigned found; int64_t correction; ssize_t bytes_avail; const char *p; /* * Find the start of the central directory. The end-of-CD * record has our starting point, but there are lots of * Zip archives which have had other data prepended to the * file, which makes the recorded offsets all too small. * So we search forward from the specified offset until we * find the real start of the central directory. Then we * know the correction we need to apply to account for leading * padding. */ if (__archive_read_seek(a, zip->central_directory_offset_adjusted, SEEK_SET) < 0) return ARCHIVE_FATAL; found = 0; while (!found) { if ((p = __archive_read_ahead(a, 20, &bytes_avail)) == NULL) return ARCHIVE_FATAL; for (found = 0, i = 0; !found && i < bytes_avail - 4;) { switch (p[i + 3]) { case 'P': i += 3; break; case 'K': i += 2; break; case 001: i += 1; break; case 002: if (memcmp(p + i, "PK\001\002", 4) == 0) { p += i; found = 1; } else i += 4; break; case 005: i += 1; break; case 006: if (memcmp(p + i, "PK\005\006", 4) == 0) { p += i; found = 1; } else if (memcmp(p + i, "PK\006\006", 4) == 0) { p += i; found = 1; } else i += 1; break; default: i += 4; break; } } __archive_read_consume(a, i); } correction = archive_filter_bytes(&a->archive, 0) - zip->central_directory_offset; __archive_rb_tree_init(&zip->tree, &rb_ops); __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops); zip->central_directory_entries_total = 0; while (1) { struct zip_entry *zip_entry; size_t filename_length, extra_length, comment_length; uint32_t external_attributes; const char *name, *r; if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) return ARCHIVE_FATAL; if (memcmp(p, "PK\006\006", 4) == 0 || memcmp(p, "PK\005\006", 4) == 0) { break; } else if (memcmp(p, "PK\001\002", 4) != 0) { archive_set_error(&a->archive, -1, "Invalid central directory signature"); return ARCHIVE_FATAL; } if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) return ARCHIVE_FATAL; zip_entry = calloc(1, sizeof(struct zip_entry)); if (zip_entry == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate zip entry"); return ARCHIVE_FATAL; } zip_entry->next = zip->zip_entries; zip_entry->flags |= LA_FROM_CENTRAL_DIRECTORY; zip->zip_entries = zip_entry; zip->central_directory_entries_total++; /* version = p[4]; */ zip_entry->system = p[5]; /* version_required = archive_le16dec(p + 6); */ zip_entry->zip_flags = archive_le16dec(p + 8); if (zip_entry->zip_flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)){ zip->has_encrypted_entries = 1; } zip_entry->compression = (char)archive_le16dec(p + 10); zip_entry->mtime = zip_time(p + 12); zip_entry->crc32 = archive_le32dec(p + 16); if (zip_entry->zip_flags & ZIP_LENGTH_AT_END) zip_entry->decdat = p[13]; else zip_entry->decdat = p[19]; zip_entry->compressed_size = archive_le32dec(p + 20); zip_entry->uncompressed_size = archive_le32dec(p + 24); filename_length = archive_le16dec(p + 28); extra_length = archive_le16dec(p + 30); comment_length = archive_le16dec(p + 32); /* disk_start = archive_le16dec(p + 34); * Better be zero. * internal_attributes = archive_le16dec(p + 36); * text bit */ external_attributes = archive_le32dec(p + 38); zip_entry->local_header_offset = archive_le32dec(p + 42) + correction; /* If we can't guess the mode, leave it zero here; when we read the local file header we might get more information. */ if (zip_entry->system == 3) { zip_entry->mode = external_attributes >> 16; } else if (zip_entry->system == 0) { // Interpret MSDOS directory bit if (0x10 == (external_attributes & 0x10)) { zip_entry->mode = AE_IFDIR | 0775; } else { zip_entry->mode = AE_IFREG | 0664; } if (0x01 == (external_attributes & 0x01)) { // Read-only bit; strip write permissions zip_entry->mode &= 0555; } } else { zip_entry->mode = 0; } /* We're done with the regular data; get the filename and * extra data. */ __archive_read_consume(a, 46); p = __archive_read_ahead(a, filename_length + extra_length, NULL); if (p == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return ARCHIVE_FATAL; } if (ARCHIVE_OK != process_extra(a, entry, p + filename_length, extra_length, zip_entry)) { return ARCHIVE_FATAL; } /* * Mac resource fork files are stored under the * "__MACOSX/" directory, so we should check if * it is. */ if (!zip->process_mac_extensions) { /* Treat every entry as a regular entry. */ __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node); } else { name = p; r = rsrc_basename(name, filename_length); if (filename_length >= 9 && strncmp("__MACOSX/", name, 9) == 0) { /* If this file is not a resource fork nor * a directory. We should treat it as a non * resource fork file to expose it. */ if (name[filename_length-1] != '/' && (r - name < 3 || r[0] != '.' || r[1] != '_')) { __archive_rb_tree_insert_node( &zip->tree, &zip_entry->node); /* Expose its parent directories. */ expose_parent_dirs(zip, name, filename_length); } else { /* This file is a resource fork file or * a directory. */ archive_strncpy(&(zip_entry->rsrcname), name, filename_length); __archive_rb_tree_insert_node( &zip->tree_rsrc, &zip_entry->node); } } else { /* Generate resource fork name to find its * resource file at zip->tree_rsrc. */ archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); archive_strncat(&(zip_entry->rsrcname), name, r - name); archive_strcat(&(zip_entry->rsrcname), "._"); archive_strncat(&(zip_entry->rsrcname), name + (r - name), filename_length - (r - name)); /* Register an entry to RB tree to sort it by * file offset. */ __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node); } } /* Skip the comment too ... */ __archive_read_consume(a, filename_length + extra_length + comment_length); } return ARCHIVE_OK; } static ssize_t zip_get_local_file_header_size(struct archive_read *a, size_t extra) { const char *p; ssize_t filename_length, extra_length; if ((p = __archive_read_ahead(a, extra + 30, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_WARN); } p += extra; if (memcmp(p, "PK\003\004", 4) != 0) { archive_set_error(&a->archive, -1, "Damaged Zip archive"); return ARCHIVE_WARN; } filename_length = archive_le16dec(p + 26); extra_length = archive_le16dec(p + 28); return (30 + filename_length + extra_length); } static int zip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry, struct zip_entry *rsrc) { struct zip *zip = (struct zip *)a->format->data; unsigned char *metadata, *mp; int64_t offset = archive_filter_bytes(&a->archive, 0); size_t remaining_bytes, metadata_bytes; ssize_t hsize; int ret = ARCHIVE_OK, eof; switch(rsrc->compression) { case 0: /* No compression. */ if (rsrc->uncompressed_size != rsrc->compressed_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Malformed OS X metadata entry: " "inconsistent size"); return (ARCHIVE_FATAL); } #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ #endif break; default: /* Unsupported compression. */ /* Return a warning. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported ZIP compression method (%s)", compression_name(rsrc->compression)); /* We can't decompress this entry, but we will * be able to skip() it and try the next entry. */ return (ARCHIVE_WARN); } if (rsrc->uncompressed_size > (4 * 1024 * 1024)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Mac metadata is too large: %jd > 4M bytes", (intmax_t)rsrc->uncompressed_size); return (ARCHIVE_WARN); } if (rsrc->compressed_size > (4 * 1024 * 1024)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Mac metadata is too large: %jd > 4M bytes", (intmax_t)rsrc->compressed_size); return (ARCHIVE_WARN); } metadata = malloc((size_t)rsrc->uncompressed_size); if (metadata == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Mac metadata"); return (ARCHIVE_FATAL); } if (offset < rsrc->local_header_offset) __archive_read_consume(a, rsrc->local_header_offset - offset); else if (offset != rsrc->local_header_offset) { __archive_read_seek(a, rsrc->local_header_offset, SEEK_SET); } hsize = zip_get_local_file_header_size(a, 0); __archive_read_consume(a, hsize); remaining_bytes = (size_t)rsrc->compressed_size; metadata_bytes = (size_t)rsrc->uncompressed_size; mp = metadata; eof = 0; while (!eof && remaining_bytes) { const unsigned char *p; ssize_t bytes_avail; size_t bytes_used; p = __archive_read_ahead(a, 1, &bytes_avail); if (p == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); ret = ARCHIVE_WARN; goto exit_mac_metadata; } if ((size_t)bytes_avail > remaining_bytes) bytes_avail = remaining_bytes; switch(rsrc->compression) { case 0: /* No compression. */ if ((size_t)bytes_avail > metadata_bytes) bytes_avail = metadata_bytes; memcpy(mp, p, bytes_avail); bytes_used = (size_t)bytes_avail; metadata_bytes -= bytes_used; mp += bytes_used; if (metadata_bytes == 0) eof = 1; break; #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ { int r; ret = zip_deflate_init(a, zip); if (ret != ARCHIVE_OK) goto exit_mac_metadata; zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)p; zip->stream.avail_in = (uInt)bytes_avail; zip->stream.total_in = 0; zip->stream.next_out = mp; zip->stream.avail_out = (uInt)metadata_bytes; zip->stream.total_out = 0; r = inflate(&zip->stream, 0); switch (r) { case Z_OK: break; case Z_STREAM_END: eof = 1; break; case Z_MEM_ERROR: archive_set_error(&a->archive, ENOMEM, "Out of memory for ZIP decompression"); ret = ARCHIVE_FATAL; goto exit_mac_metadata; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP decompression failed (%d)", r); ret = ARCHIVE_FATAL; goto exit_mac_metadata; } bytes_used = zip->stream.total_in; metadata_bytes -= zip->stream.total_out; mp += zip->stream.total_out; break; } #endif default: bytes_used = 0; break; } __archive_read_consume(a, bytes_used); remaining_bytes -= bytes_used; } archive_entry_copy_mac_metadata(entry, metadata, (size_t)rsrc->uncompressed_size - metadata_bytes); exit_mac_metadata: __archive_read_seek(a, offset, SEEK_SET); zip->decompress_init = 0; free(metadata); return (ret); } static int archive_read_format_zip_seekable_read_header(struct archive_read *a, struct archive_entry *entry) { struct zip *zip = (struct zip *)a->format->data; struct zip_entry *rsrc; int64_t offset; int r, ret = ARCHIVE_OK; /* * It should be sufficient to call archive_read_next_header() for * a reader to determine if an entry is encrypted or not. If the * encryption of an entry is only detectable when calling * archive_read_data(), so be it. We'll do the same check there * as well. */ if (zip->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) zip->has_encrypted_entries = 0; a->archive.archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive.archive_format_name == NULL) a->archive.archive_format_name = "ZIP"; if (zip->zip_entries == NULL) { r = slurp_central_directory(a, entry, zip); if (r != ARCHIVE_OK) return r; /* Get first entry whose local header offset is lower than * other entries in the archive file. */ zip->entry = (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree); } else if (zip->entry != NULL) { /* Get next entry in local header offset order. */ zip->entry = (struct zip_entry *)__archive_rb_tree_iterate( &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT); } if (zip->entry == NULL) return ARCHIVE_EOF; if (zip->entry->rsrcname.s) rsrc = (struct zip_entry *)__archive_rb_tree_find_node( &zip->tree_rsrc, zip->entry->rsrcname.s); else rsrc = NULL; if (zip->cctx_valid) archive_decrypto_aes_ctr_release(&zip->cctx); if (zip->hctx_valid) archive_hmac_sha1_cleanup(&zip->hctx); zip->tctx_valid = zip->cctx_valid = zip->hctx_valid = 0; __archive_read_reset_passphrase(a); /* File entries are sorted by the header offset, we should mostly * use __archive_read_consume to advance a read point to avoid * redundant data reading. */ offset = archive_filter_bytes(&a->archive, 0); if (offset < zip->entry->local_header_offset) __archive_read_consume(a, zip->entry->local_header_offset - offset); else if (offset != zip->entry->local_header_offset) { __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET); } zip->unconsumed = 0; r = zip_read_local_file_header(a, entry, zip); if (r != ARCHIVE_OK) return r; if (rsrc) { int ret2 = zip_read_mac_metadata(a, entry, rsrc); if (ret2 < ret) ret = ret2; } return (ret); } /* * We're going to seek for the next header anyway, so we don't * need to bother doing anything here. */ static int archive_read_format_zip_read_data_skip_seekable(struct archive_read *a) { struct zip *zip; zip = (struct zip *)(a->format->data); zip->unconsumed = 0; return (ARCHIVE_OK); } int archive_read_support_format_zip_seekable(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct zip *zip; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable"); zip = (struct zip *)calloc(1, sizeof(*zip)); if (zip == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data"); return (ARCHIVE_FATAL); } #ifdef HAVE_COPYFILE_H /* Set this by default on Mac OS. */ zip->process_mac_extensions = 1; #endif /* * Until enough data has been read, we cannot tell about * any encrypted entries yet. */ zip->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; zip->crc32func = real_crc32; r = __archive_read_register_format(a, zip, "zip", archive_read_format_zip_seekable_bid, archive_read_format_zip_options, archive_read_format_zip_seekable_read_header, archive_read_format_zip_read_data, archive_read_format_zip_read_data_skip_seekable, NULL, archive_read_format_zip_cleanup, archive_read_support_format_zip_capabilities_seekable, archive_read_format_zip_has_encrypted_entries); if (r != ARCHIVE_OK) free(zip); return (ARCHIVE_OK); } /*# vim:set noet:*/ diff --git a/contrib/libarchive/libarchive/cpio.5 b/contrib/libarchive/libarchive/cpio.5 index 837a45692e3b..c71018b1996e 100644 --- a/contrib/libarchive/libarchive/cpio.5 +++ b/contrib/libarchive/libarchive/cpio.5 @@ -1,391 +1,391 @@ .\" Copyright (c) 2007 Tim Kientzle .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd December 23, 2011 .Dt CPIO 5 .Os .Sh NAME .Nm cpio .Nd format of cpio archive files .Sh DESCRIPTION The .Nm archive format collects any number of files, directories, and other file system objects (symbolic links, device nodes, etc.) into a single stream of bytes. .Ss General Format Each file system object in a .Nm archive comprises a header record with basic numeric metadata followed by the full pathname of the entry and the file data. The header record stores a series of integer values that generally follow the fields in .Va struct stat . (See .Xr stat 2 for details.) The variants differ primarily in how they store those integers (binary, octal, or hexadecimal). The header is followed by the pathname of the entry (the length of the pathname is stored in the header) and any file data. The end of the archive is indicated by a special record with the pathname .Dq TRAILER!!! . .Ss PWB format The PWB binary .Nm format is the original format, when cpio was introduced as part of the Programmer's Work Bench system, a variant of 6th Edition UNIX. It stores numbers as 2-byte and 4-byte binary values. Each entry begins with a header in the following format: .Pp .Bd -literal -offset indent struct header_pwb_cpio { short h_magic; short h_dev; short h_ino; short h_mode; short h_uid; short h_gid; short h_nlink; short h_majmin; long h_mtime; short h_namesize; long h_filesize; }; .Ed .Pp The .Va short fields here are 16-bit integer values, while the .Va long fields are 32 bit integers. Since PWB UNIX, like the 6th Edition UNIX it was based on, only ran on PDP-11 computers, they are in PDP-endian format, which has little-endian shorts, and big-endian longs. That is, the long integer whose hexadecimal representation is 0x12345678 would be stored in four successive bytes as 0x34, 0x12, 0x78, 0x56. The fields are as follows: .Bl -tag -width indent .It Va h_magic The integer value octal 070707. .It Va h_dev , Va h_ino The device and inode numbers from the disk. These are used by programs that read .Nm archives to determine when two entries refer to the same file. Programs that synthesize .Nm archives should be careful to set these to distinct values for each entry. .It Va h_mode The mode specifies both the regular permissions and the file type, and it also holds a couple of bits that are irrelevant to the cpio format, because the field is actually a raw copy of the mode field in the inode representing the file. These are the IALLOC flag, which shows that the inode entry is in use, and the ILARG flag, which shows that the file it represents is large enough to have indirect blocks pointers in the inode. The mode is decoded as follows: .Pp .Bl -tag -width "MMMMMMM" -compact .It 0100000 IALLOC flag - irrelevant to cpio. .It 0060000 This masks the file type bits. .It 0040000 File type value for directories. .It 0020000 File type value for character special devices. .It 0060000 File type value for block special devices. .It 0010000 ILARG flag - irrelevant to cpio. .It 0004000 SUID bit. .It 0002000 SGID bit. .It 0001000 Sticky bit. .It 0000777 The lower 9 bits specify read/write/execute permissions for world, group, and user following standard POSIX conventions. .El .It Va h_uid , Va h_gid The numeric user id and group id of the owner. .It Va h_nlink The number of links to this file. Directories always have a value of at least two here. Note that hardlinked files include file data with every copy in the archive. .It Va h_majmin For block special and character special entries, this field contains the associated device number, with the major number in the high byte, and the minor number in the low byte. For all other entry types, it should be set to zero by writers and ignored by readers. .It Va h_mtime Modification time of the file, indicated as the number of seconds since the start of the epoch, 00:00:00 UTC January 1, 1970. .It Va h_namesize The number of bytes in the pathname that follows the header. This count includes the trailing NUL byte. .It Va h_filesize The size of the file. Note that this archive format is limited to 16 megabyte file sizes, because PWB UNIX, like 6th Edition, only used an unsigned 24 bit integer for the file size internally. .El .Pp The pathname immediately follows the fixed header. If .Cm h_namesize is odd, an additional NUL byte is added after the pathname. The file data is then appended, again with an additional NUL appended if needed to get the next header at an even offset. .Pp Hardlinked files are not given special treatment; the full file contents are included with each copy of the file. .Ss New Binary Format The new binary .Nm format showed up when cpio was adopted into late 7th Edition UNIX. It is exactly like the PWB binary format, described above, except for three changes: .Pp First, UNIX now ran on more than one hardware type, so the endianness of 16 bit integers must be determined by observing the magic number at the start of the header. The 32 bit integers are still always stored with the most significant word first, though, so each of those two, in the struct shown above, was stored as an array of two 16 bit integers, in the traditional order. Those 16 bit integers, like all the others in the struct, were accessed using a macro that byte swapped them if necessary. .Pp Next, 7th Edition had more file types to store, and the IALLOC and ILARG flag bits were re-purposed to accommodate these. The revised use of the various bits is as follows: .Pp .Bl -tag -width "MMMMMMM" -compact .It 0170000 This masks the file type bits. .It 0140000 File type value for sockets. .It 0120000 File type value for symbolic links. For symbolic links, the link body is stored as file data. .It 0100000 File type value for regular files. .It 0060000 File type value for block special devices. .It 0040000 File type value for directories. .It 0020000 File type value for character special devices. .It 0010000 File type value for named pipes or FIFOs. .It 0004000 SUID bit. .It 0002000 SGID bit. .It 0001000 Sticky bit. .It 0000777 The lower 9 bits specify read/write/execute permissions for world, group, and user following standard POSIX conventions. .El .Pp Finally, the file size field now represents a signed 32 bit integer in the underlying file system, so the maximum file size has increased to 2 gigabytes. .Pp Note that there is no obvious way to tell which of the two binary formats an archive uses, other than to see which one makes more sense. The typical error scenario is that a PWB format archive unpacked as if it were in the new format will create named sockets instead of directories, and then fail to unpack files that should go in those directories. Running .Va bsdcpio -itv on an unknown archive will make it obvious which it is: if it's PWB format, directories will be listed with an 's' instead of a 'd' as the first character of the mode string, and the larger files will have a '?' in that position. .Ss Portable ASCII Format .St -susv2 standardized an ASCII variant that is portable across all platforms. It is commonly known as the .Dq old character format or as the .Dq odc format. It stores the same numeric fields as the old binary format, but represents them as 6-character or 11-character octal values. .Pp .Bd -literal -offset indent struct cpio_odc_header { char c_magic[6]; char c_dev[6]; char c_ino[6]; char c_mode[6]; char c_uid[6]; char c_gid[6]; char c_nlink[6]; char c_rdev[6]; char c_mtime[11]; char c_namesize[6]; char c_filesize[11]; }; .Ed .Pp The fields are identical to those in the new binary format. The name and file body follow the fixed header. Unlike the binary formats, there is no additional padding after the pathname or file contents. If the files being archived are themselves entirely ASCII, then the resulting archive will be entirely ASCII, except for the NUL byte that terminates the name field. .Ss New ASCII Format The "new" ASCII format uses 8-byte hexadecimal fields for all numbers and separates device numbers into separate fields for major and minor numbers. .Pp .Bd -literal -offset indent struct cpio_newc_header { char c_magic[6]; char c_ino[8]; char c_mode[8]; char c_uid[8]; char c_gid[8]; char c_nlink[8]; char c_mtime[8]; char c_filesize[8]; char c_devmajor[8]; char c_devminor[8]; char c_rdevmajor[8]; char c_rdevminor[8]; char c_namesize[8]; char c_check[8]; }; .Ed .Pp Except as specified below, the fields here match those specified for the new binary format above. .Bl -tag -width indent .It Va magic The string .Dq 070701 . .It Va check This field is always set to zero by writers and ignored by readers. See the next section for more details. .El .Pp The pathname is followed by NUL bytes so that the total size of the fixed header plus pathname is a multiple of four. Likewise, the file data is padded to a multiple of four bytes. Note that this format supports only 4 gigabyte files (unlike the older ASCII format, which supports 8 gigabyte files). .Pp In this format, hardlinked files are handled by setting the filesize to zero for each entry except the first one that appears in the archive. .Ss New CRC Format The CRC format is identical to the new ASCII format described in the previous section except that the magic field is set to .Dq 070702 and the .Va check field is set to the sum of all bytes in the file data. This sum is computed treating all bytes as unsigned values and using unsigned arithmetic. Only the least-significant 32 bits of the sum are stored. .Ss HP variants The .Nm cpio implementation distributed with HPUX used XXXX but stored device numbers differently XXX. .Ss Other Extensions and Variants Sun Solaris uses additional file types to store extended file data, including ACLs and extended attributes, as special entries in cpio archives. .Pp XXX Others? XXX .Sh SEE ALSO .Xr cpio 1 , .Xr tar 5 .Sh STANDARDS The .Nm cpio utility is no longer a part of POSIX or the Single Unix Standard. It last appeared in .St -susv2 . It has been supplanted in subsequent standards by .Xr pax 1 . The portable ASCII format is currently part of the specification for the .Xr pax 1 utility. .Sh HISTORY The original cpio utility was written by Dick Haight while working in AT&T's Unix Support Group. It appeared in 1977 as part of PWB/UNIX 1.0, the .Dq Programmer's Work Bench derived from -.At 6th Edition UNIX +.At v6 that was used internally at AT&T. Both the new binary and old character formats were in use by 1980, according to the System III source released by SCO under their .Dq Ancient Unix license. The character format was adopted as part of .St -p1003.1-88 . XXX when did "newc" appear? Who invented it? When did HP come out with their variant? When did Sun introduce ACLs and extended attributes? XXX .Sh BUGS The .Dq CRC format is mis-named, as it uses a simple checksum and not a cyclic redundancy check. .Pp The binary formats are limited to 16 bits for user id, group id, device, and inode numbers. They are limited to 16 megabyte and 2 gigabyte file sizes for the older and newer variants, respectively. .Pp The old ASCII format is limited to 18 bits for the user id, group id, device, and inode numbers. It is limited to 8 gigabyte file sizes. .Pp The new ASCII format is limited to 4 gigabyte file sizes. .Pp None of the cpio formats store user or group names, which are essential when moving files between systems with dissimilar user or group numbering. .Pp Especially when writing older cpio variants, it may be necessary to map actual device/inode values to synthesized values that fit the available fields. With very large filesystems, this may be necessary even for the newer formats. diff --git a/lib/libarchive/config_freebsd.h b/lib/libarchive/config_freebsd.h index 0e8ac64eba47..20230ca26adf 100644 --- a/lib/libarchive/config_freebsd.h +++ b/lib/libarchive/config_freebsd.h @@ -1,262 +1,263 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2007 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #define __LIBARCHIVE_CONFIG_H_INCLUDED 1 #include /* FreeBSD 5.0 and later has ACL and extattr support. */ #if __FreeBSD__ > 4 #define ARCHIVE_ACL_FREEBSD 1 #define ARCHIVE_XATTR_FREEBSD 1 #define HAVE_ACL_GET_PERM_NP 1 #define HAVE_ARC4RANDOM_BUF 1 #define HAVE_STRUCT_XVFSCONF 1 #define HAVE_SYS_ACL_H 1 #define HAVE_SYS_EXTATTR_H 1 #if __FreeBSD__ > 7 /* FreeBSD 8.0 and later has NFSv4 ACL support */ #define ARCHIVE_ACL_FREEBSD_NFS4 1 #define HAVE_ACL_GET_LINK_NP 1 #define HAVE_ACL_IS_TRIVIAL_NP 1 #define HAVE_ACL_SET_LINK_NP 1 #endif /* __FreeBSD__ > 7 */ #endif /* __FreeBSD__ > 4 */ #ifdef WITH_OPENSSL #define HAVE_LIBCRYPTO 1 #define HAVE_OPENSSL_EVP_H 1 #define HAVE_OPENSSL_MD5_H 1 #define HAVE_OPENSSL_RIPEMD_H 1 #define HAVE_OPENSSL_SHA_H 1 #define HAVE_OPENSSL_SHA256_INIT 1 #define HAVE_OPENSSL_SHA384_INIT 1 #define HAVE_OPENSSL_SHA512_INIT 1 #define HAVE_PKCS5_PBKDF2_HMAC_SHA1 1 #define HAVE_SHA256 1 #define HAVE_SHA384 1 #define HAVE_SHA512 1 #else #define HAVE_LIBMD 1 #define HAVE_MD5_H 1 #define HAVE_MD5INIT 1 #define HAVE_RIPEMD_H 1 #define HAVE_SHA_H 1 #define HAVE_SHA1 1 #define HAVE_SHA1_INIT 1 #define HAVE_SHA256 1 #define HAVE_SHA256_H 1 #define HAVE_SHA256_INIT 1 #define HAVE_SHA512 1 #define HAVE_SHA512_H 1 #define HAVE_SHA512_INIT 1 #endif #define HAVE_BSDXML_H 1 #define HAVE_BZLIB_H 1 #define HAVE_CHFLAGS 1 #define HAVE_CHOWN 1 #define HAVE_CHROOT 1 #define HAVE_CTIME_R 1 #define HAVE_CTYPE_H 1 #define HAVE_DECL_EXTATTR_NAMESPACE_USER 1 #define HAVE_DECL_INT32_MAX 1 #define HAVE_DECL_INT32_MIN 1 #define HAVE_DECL_INT64_MAX 1 #define HAVE_DECL_INT64_MIN 1 #define HAVE_DECL_INTMAX_MAX 1 #define HAVE_DECL_INTMAX_MIN 1 #define HAVE_DECL_SIZE_MAX 1 #define HAVE_DECL_SSIZE_MAX 1 #define HAVE_DECL_STRERROR_R 1 #define HAVE_DECL_UINT32_MAX 1 #define HAVE_DECL_UINT64_MAX 1 #define HAVE_DECL_UINTMAX_MAX 1 #define HAVE_DIRENT_H 1 +#define HAVE_DIRFD 1 #define HAVE_DLFCN_H 1 #define HAVE_D_MD_ORDER 1 #define HAVE_EFTYPE 1 #define HAVE_EILSEQ 1 #define HAVE_ERRNO_H 1 #define HAVE_FCHDIR 1 #define HAVE_FCHFLAGS 1 #define HAVE_FCHMOD 1 #define HAVE_FCHOWN 1 #define HAVE_FCNTL 1 #define HAVE_FCNTL_H 1 #define HAVE_FDOPENDIR 1 #define HAVE_FORK 1 #define HAVE_FSEEKO 1 #define HAVE_FSTAT 1 #define HAVE_FSTATAT 1 #define HAVE_FSTATFS 1 #define HAVE_FSTATVFS 1 #define HAVE_FTRUNCATE 1 #define HAVE_FUTIMES 1 #define HAVE_FUTIMESAT 1 #define HAVE_GETEUID 1 #define HAVE_GETGRGID_R 1 #define HAVE_GETGRNAM_R 1 #define HAVE_GETPID 1 #define HAVE_GETPWNAM_R 1 #define HAVE_GETPWUID_R 1 #define HAVE_GETVFSBYNAME 1 #define HAVE_GMTIME_R 1 #define HAVE_GRP_H 1 #define HAVE_INTMAX_T 1 #define HAVE_INTTYPES_H 1 #define HAVE_LANGINFO_H 1 #define HAVE_LCHFLAGS 1 #define HAVE_LCHMOD 1 #define HAVE_LCHOWN 1 #define HAVE_LIBZ 1 #define HAVE_LIMITS_H 1 #define HAVE_LINK 1 #define HAVE_LINKAT 1 #define HAVE_LOCALE_H 1 #define HAVE_LOCALTIME_R 1 #define HAVE_LONG_LONG_INT 1 #define HAVE_LSTAT 1 #define HAVE_LUTIMES 1 #define HAVE_MBRTOWC 1 #define HAVE_MEMMOVE 1 #define HAVE_MEMORY_H 1 #define HAVE_MEMSET 1 #define HAVE_MKDIR 1 #define HAVE_MKFIFO 1 #define HAVE_MKNOD 1 #define HAVE_MKSTEMP 1 #define HAVE_NL_LANGINFO 1 #define HAVE_OPENAT 1 #define HAVE_PATHS_H 1 #define HAVE_PIPE 1 #define HAVE_POLL 1 #define HAVE_POLL_H 1 #define HAVE_POSIX_SPAWNP 1 #define HAVE_PTHREAD_H 1 #define HAVE_PWD_H 1 #define HAVE_READDIR_R 1 #define HAVE_READLINK 1 #define HAVE_READLINKAT 1 #define HAVE_READPASSPHRASE 1 #define HAVE_READPASSPHRASE_H 1 #define HAVE_REGEX_H 1 #define HAVE_SELECT 1 #define HAVE_SETENV 1 #define HAVE_SETLOCALE 1 #define HAVE_SIGACTION 1 #define HAVE_SIGNAL_H 1 #define HAVE_SPAWN_H 1 #define HAVE_STATFS 1 #define HAVE_STATVFS 1 #define HAVE_STDARG_H 1 #define HAVE_STDINT_H 1 #define HAVE_STDLIB_H 1 #define HAVE_STRCHR 1 #define HAVE_STRDUP 1 #define HAVE_STRERROR 1 #define HAVE_STRERROR_R 1 #define HAVE_STRFTIME 1 #define HAVE_STRINGS_H 1 #define HAVE_STRING_H 1 #define HAVE_STRNLEN 1 #define HAVE_STRRCHR 1 #define HAVE_STRUCT_STATFS_F_NAMEMAX 1 #define HAVE_STRUCT_STAT_ST_BIRTHTIME 1 #define HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC 1 #define HAVE_STRUCT_STAT_ST_BLKSIZE 1 #define HAVE_STRUCT_STAT_ST_FLAGS 1 #define HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1 #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1 #define HAVE_STRUCT_TM_TM_GMTOFF 1 #define HAVE_SYMLINK 1 #define HAVE_SYS_CDEFS_H 1 #define HAVE_SYS_IOCTL_H 1 #define HAVE_SYS_MOUNT_H 1 #define HAVE_SYS_PARAM_H 1 #define HAVE_SYS_POLL_H 1 #define HAVE_SYS_SELECT_H 1 #define HAVE_SYS_STATVFS_H 1 #define HAVE_SYS_STAT_H 1 #define HAVE_SYS_TIME_H 1 #define HAVE_SYS_TYPES_H 1 #define HAVE_SYS_UTSNAME_H 1 #define HAVE_SYS_WAIT_H 1 #define HAVE_TIMEGM 1 #define HAVE_TIME_H 1 #define HAVE_TZSET 1 #define HAVE_UINTMAX_T 1 #define HAVE_UNISTD_H 1 #define HAVE_UNLINKAT 1 #define HAVE_UNSETENV 1 #define HAVE_UNSIGNED_LONG_LONG 1 #define HAVE_UNSIGNED_LONG_LONG_INT 1 #define HAVE_UTIME 1 #define HAVE_UTIMES 1 #define HAVE_UTIME_H 1 #define HAVE_VFORK 1 #define HAVE_VPRINTF 1 #define HAVE_WCHAR_H 1 #define HAVE_WCHAR_T 1 #define HAVE_WCRTOMB 1 #define HAVE_WCSCMP 1 #define HAVE_WCSCPY 1 #define HAVE_WCSLEN 1 #define HAVE_WCTOMB 1 #define HAVE_WCTYPE_H 1 #define HAVE_WMEMCMP 1 #define HAVE_WMEMCPY 1 #define HAVE_WMEMMOVE 1 #define HAVE_ZLIB_H 1 #define TIME_WITH_SYS_TIME 1 #if __FreeBSD_version >= 1100056 #define HAVE_FUTIMENS 1 #define HAVE_UTIMENSAT 1 #endif /* FreeBSD 4 and earlier lack intmax_t/uintmax_t */ #if __FreeBSD__ < 5 #define intmax_t int64_t #define uintmax_t uint64_t #endif /* FreeBSD defines for archive_hash.h */ #ifdef WITH_OPENSSL #define ARCHIVE_CRYPTO_MD5_OPENSSL 1 #define ARCHIVE_CRYPTO_RMD160_OPENSSL 1 #define ARCHIVE_CRYPTO_SHA1_OPENSSL #define ARCHIVE_CRYPTO_SHA256_OPENSSL 1 #define ARCHIVE_CRYPTO_SHA384_OPENSSL 1 #define ARCHIVE_CRYPTO_SHA512_OPENSSL 1 #else #define ARCHIVE_CRYPTO_MD5_LIBMD 1 #define ARCHIVE_CRYPTO_SHA1_LIBMD 1 #define ARCHIVE_CRYPTO_SHA256_LIBMD 1 #define ARCHIVE_CRYPTO_SHA512_LIBMD 1 #endif