Index: head/contrib/libarchive/libarchive/archive_acl.c =================================================================== --- head/contrib/libarchive/libarchive/archive_acl.c (revision 342041) +++ head/contrib/libarchive/libarchive/archive_acl.c (revision 342042) @@ -1,2100 +1,2105 @@ /*- * Copyright (c) 2003-2010 Tim Kientzle * Copyright (c) 2016 Martin Matuska * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_WCHAR_H #include #endif #include "archive_acl_private.h" #include "archive_entry.h" #include "archive_private.h" #undef max #define max(a, b) ((a)>(b)?(a):(b)) #ifndef HAVE_WMEMCMP /* Good enough for simple equality testing, but not for sorting. */ #define wmemcmp(a,b,i) memcmp((a), (b), (i) * sizeof(wchar_t)) #endif static int acl_special(struct archive_acl *acl, int type, int permset, int tag); static struct archive_acl_entry *acl_new_entry(struct archive_acl *acl, int type, int permset, int tag, int id); static int archive_acl_add_entry_len_l(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name, size_t len, struct archive_string_conv *sc); static int archive_acl_text_want_type(struct archive_acl *acl, int flags); static ssize_t archive_acl_text_len(struct archive_acl *acl, int want_type, int flags, int wide, struct archive *a, struct archive_string_conv *sc); static int isint_w(const wchar_t *start, const wchar_t *end, int *result); static int ismode_w(const wchar_t *start, const wchar_t *end, int *result); static int is_nfs4_flags_w(const wchar_t *start, const wchar_t *end, int *result); static int is_nfs4_perms_w(const wchar_t *start, const wchar_t *end, int *result); static void next_field_w(const wchar_t **wp, const wchar_t **start, const wchar_t **end, wchar_t *sep); static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int type, int tag, int flags, const wchar_t *wname, int perm, int id); static void append_id_w(wchar_t **wp, int id); static int isint(const char *start, const char *end, int *result); static int ismode(const char *start, const char *end, int *result); static int is_nfs4_flags(const char *start, const char *end, int *result); static int is_nfs4_perms(const char *start, const char *end, int *result); static void next_field(const char **p, const char **start, const char **end, char *sep); static void append_entry(char **p, const char *prefix, int type, int tag, int flags, const char *name, int perm, int id); static void append_id(char **p, int id); static const struct { const int perm; const char c; const wchar_t wc; } nfsv4_acl_perm_map[] = { { ARCHIVE_ENTRY_ACL_READ_DATA | ARCHIVE_ENTRY_ACL_LIST_DIRECTORY, 'r', L'r' }, { ARCHIVE_ENTRY_ACL_WRITE_DATA | ARCHIVE_ENTRY_ACL_ADD_FILE, 'w', L'w' }, { ARCHIVE_ENTRY_ACL_EXECUTE, 'x', L'x' }, { ARCHIVE_ENTRY_ACL_APPEND_DATA | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY, 'p', L'p' }, { ARCHIVE_ENTRY_ACL_DELETE, 'd', L'd' }, { ARCHIVE_ENTRY_ACL_DELETE_CHILD, 'D', L'D' }, { ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, 'a', L'a' }, { ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, 'A', L'A' }, { ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, 'R', L'R' }, { ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, 'W', L'W' }, { ARCHIVE_ENTRY_ACL_READ_ACL, 'c', L'c' }, { ARCHIVE_ENTRY_ACL_WRITE_ACL, 'C', L'C' }, { ARCHIVE_ENTRY_ACL_WRITE_OWNER, 'o', L'o' }, { ARCHIVE_ENTRY_ACL_SYNCHRONIZE, 's', L's' } }; static const int nfsv4_acl_perm_map_size = (int)(sizeof(nfsv4_acl_perm_map) / sizeof(nfsv4_acl_perm_map[0])); static const struct { const int perm; const char c; const wchar_t wc; } nfsv4_acl_flag_map[] = { { ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, 'f', L'f' }, { ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, 'd', L'd' }, { ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, 'i', L'i' }, { ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, 'n', L'n' }, { ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS, 'S', L'S' }, { ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS, 'F', L'F' }, { ARCHIVE_ENTRY_ACL_ENTRY_INHERITED, 'I', L'I' } }; static const int nfsv4_acl_flag_map_size = (int)(sizeof(nfsv4_acl_flag_map) / sizeof(nfsv4_acl_flag_map[0])); void archive_acl_clear(struct archive_acl *acl) { struct archive_acl_entry *ap; while (acl->acl_head != NULL) { ap = acl->acl_head->next; archive_mstring_clean(&acl->acl_head->name); free(acl->acl_head); acl->acl_head = ap; } if (acl->acl_text_w != NULL) { free(acl->acl_text_w); acl->acl_text_w = NULL; } if (acl->acl_text != NULL) { free(acl->acl_text); acl->acl_text = NULL; } acl->acl_p = NULL; acl->acl_types = 0; acl->acl_state = 0; /* Not counting. */ } void archive_acl_copy(struct archive_acl *dest, struct archive_acl *src) { struct archive_acl_entry *ap, *ap2; archive_acl_clear(dest); dest->mode = src->mode; ap = src->acl_head; while (ap != NULL) { ap2 = acl_new_entry(dest, ap->type, ap->permset, ap->tag, ap->id); if (ap2 != NULL) archive_mstring_copy(&ap2->name, &ap->name); ap = ap->next; } } int archive_acl_add_entry(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name) { struct archive_acl_entry *ap; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != '\0') archive_mstring_copy_mbs(&ap->name, name); else archive_mstring_clean(&ap->name); return ARCHIVE_OK; } int archive_acl_add_entry_w_len(struct archive_acl *acl, int type, int permset, int tag, int id, const wchar_t *name, size_t len) { struct archive_acl_entry *ap; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != L'\0' && len > 0) archive_mstring_copy_wcs_len(&ap->name, name, len); else archive_mstring_clean(&ap->name); return ARCHIVE_OK; } static int archive_acl_add_entry_len_l(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name, size_t len, struct archive_string_conv *sc) { struct archive_acl_entry *ap; int r; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != '\0' && len > 0) { r = archive_mstring_copy_mbs_len_l(&ap->name, name, len, sc); } else { r = 0; archive_mstring_clean(&ap->name); } if (r == 0) return (ARCHIVE_OK); else if (errno == ENOMEM) return (ARCHIVE_FATAL); else return (ARCHIVE_WARN); } /* * If this ACL entry is part of the standard POSIX permissions set, * store the permissions in the stat structure and return zero. */ static int acl_special(struct archive_acl *acl, int type, int permset, int tag) { if (type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS && ((permset & ~007) == 0)) { switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: acl->mode &= ~0700; acl->mode |= (permset & 7) << 6; return (0); case ARCHIVE_ENTRY_ACL_GROUP_OBJ: acl->mode &= ~0070; acl->mode |= (permset & 7) << 3; return (0); case ARCHIVE_ENTRY_ACL_OTHER: acl->mode &= ~0007; acl->mode |= permset & 7; return (0); } } return (1); } /* * Allocate and populate a new ACL entry with everything but the * name. */ static struct archive_acl_entry * acl_new_entry(struct archive_acl *acl, int type, int permset, int tag, int id) { struct archive_acl_entry *ap, *aq; /* Type argument must be a valid NFS4 or POSIX.1e type. * The type must agree with anything already set and * the permset must be compatible. */ if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) { return (NULL); } if (permset & ~(ARCHIVE_ENTRY_ACL_PERMS_NFS4 | ARCHIVE_ENTRY_ACL_INHERITANCE_NFS4)) { return (NULL); } } else if (type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { return (NULL); } if (permset & ~ARCHIVE_ENTRY_ACL_PERMS_POSIX1E) { return (NULL); } } else { return (NULL); } /* Verify the tag is valid and compatible with NFS4 or POSIX.1e. */ switch (tag) { case ARCHIVE_ENTRY_ACL_USER: case ARCHIVE_ENTRY_ACL_USER_OBJ: case ARCHIVE_ENTRY_ACL_GROUP: case ARCHIVE_ENTRY_ACL_GROUP_OBJ: /* Tags valid in both NFS4 and POSIX.1e */ break; case ARCHIVE_ENTRY_ACL_MASK: case ARCHIVE_ENTRY_ACL_OTHER: /* Tags valid only in POSIX.1e. */ if (type & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { return (NULL); } break; case ARCHIVE_ENTRY_ACL_EVERYONE: /* Tags valid only in NFS4. */ if (type & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) { return (NULL); } break; default: /* No other values are valid. */ return (NULL); } if (acl->acl_text_w != NULL) { free(acl->acl_text_w); acl->acl_text_w = NULL; } if (acl->acl_text != NULL) { free(acl->acl_text); acl->acl_text = NULL; } /* * If there's a matching entry already in the list, overwrite it. * NFSv4 entries may be repeated and are not overwritten. * * TODO: compare names of no id is provided (needs more rework) */ ap = acl->acl_head; aq = NULL; while (ap != NULL) { if (((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) && ap->type == type && ap->tag == tag && ap->id == id) { if (id != -1 || (tag != ARCHIVE_ENTRY_ACL_USER && tag != ARCHIVE_ENTRY_ACL_GROUP)) { ap->permset = permset; return (ap); } } aq = ap; ap = ap->next; } /* Add a new entry to the end of the list. */ ap = (struct archive_acl_entry *)calloc(1, sizeof(*ap)); if (ap == NULL) return (NULL); if (aq == NULL) acl->acl_head = ap; else aq->next = ap; ap->type = type; ap->tag = tag; ap->id = id; ap->permset = permset; acl->acl_types |= type; return (ap); } /* * Return a count of entries matching "want_type". */ int archive_acl_count(struct archive_acl *acl, int want_type) { int count; struct archive_acl_entry *ap; count = 0; ap = acl->acl_head; while (ap != NULL) { if ((ap->type & want_type) != 0) count++; ap = ap->next; } if (count > 0 && ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) count += 3; return (count); } /* * Return a bitmask of stored ACL types in an ACL list */ int archive_acl_types(struct archive_acl *acl) { return (acl->acl_types); } /* * Prepare for reading entries from the ACL data. Returns a count * of entries matching "want_type", or zero if there are no * non-extended ACL entries of that type. */ int archive_acl_reset(struct archive_acl *acl, int want_type) { int count, cutoff; count = archive_acl_count(acl, want_type); /* * If the only entries are the three standard ones, * then don't return any ACL data. (In this case, * client can just use chmod(2) to set permissions.) */ if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) cutoff = 3; else cutoff = 0; if (count > cutoff) acl->acl_state = ARCHIVE_ENTRY_ACL_USER_OBJ; else acl->acl_state = 0; acl->acl_p = acl->acl_head; return (count); } /* * Return the next ACL entry in the list. Fake entries for the * standard permissions and include them in the returned list. */ int archive_acl_next(struct archive *a, struct archive_acl *acl, int want_type, int *type, int *permset, int *tag, int *id, const char **name) { *name = NULL; *id = -1; /* * The acl_state is either zero (no entries available), -1 * (reading from list), or an entry type (retrieve that type * from ae_stat.aest_mode). */ if (acl->acl_state == 0) return (ARCHIVE_WARN); /* The first three access entries are special. */ if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { switch (acl->acl_state) { case ARCHIVE_ENTRY_ACL_USER_OBJ: *permset = (acl->mode >> 6) & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_USER_OBJ; acl->acl_state = ARCHIVE_ENTRY_ACL_GROUP_OBJ; return (ARCHIVE_OK); case ARCHIVE_ENTRY_ACL_GROUP_OBJ: *permset = (acl->mode >> 3) & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; acl->acl_state = ARCHIVE_ENTRY_ACL_OTHER; return (ARCHIVE_OK); case ARCHIVE_ENTRY_ACL_OTHER: *permset = acl->mode & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_OTHER; acl->acl_state = -1; acl->acl_p = acl->acl_head; return (ARCHIVE_OK); default: break; } } while (acl->acl_p != NULL && (acl->acl_p->type & want_type) == 0) acl->acl_p = acl->acl_p->next; if (acl->acl_p == NULL) { acl->acl_state = 0; *type = 0; *permset = 0; *tag = 0; *id = -1; *name = NULL; return (ARCHIVE_EOF); /* End of ACL entries. */ } *type = acl->acl_p->type; *permset = acl->acl_p->permset; *tag = acl->acl_p->tag; *id = acl->acl_p->id; if (archive_mstring_get_mbs(a, &acl->acl_p->name, name) != 0) { if (errno == ENOMEM) return (ARCHIVE_FATAL); *name = NULL; } acl->acl_p = acl->acl_p->next; return (ARCHIVE_OK); } /* * Determine what type of ACL do we want */ static int archive_acl_text_want_type(struct archive_acl *acl, int flags) { int want_type; /* Check if ACL is NFSv4 */ if ((acl->acl_types & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { /* NFSv4 should never mix with POSIX.1e */ if ((acl->acl_types & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) return (0); else return (ARCHIVE_ENTRY_ACL_TYPE_NFS4); } /* Now deal with POSIX.1e ACLs */ want_type = 0; if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) want_type |= ARCHIVE_ENTRY_ACL_TYPE_ACCESS; if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) want_type |= ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; /* By default we want both access and default ACLs */ if (want_type == 0) return (ARCHIVE_ENTRY_ACL_TYPE_POSIX1E); return (want_type); } /* * Calculate ACL text string length */ static ssize_t archive_acl_text_len(struct archive_acl *acl, int want_type, int flags, int wide, struct archive *a, struct archive_string_conv *sc) { struct archive_acl_entry *ap; const char *name; const wchar_t *wname; int count, idlen, tmp, r; ssize_t length; size_t len; count = 0; length = 0; for (ap = acl->acl_head; ap != NULL; ap = ap->next) { if ((ap->type & want_type) == 0) continue; /* * Filemode-mapping ACL entries are stored exclusively in * ap->mode so they should not be in the list */ if ((ap->type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (ap->tag == ARCHIVE_ENTRY_ACL_USER_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_GROUP_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_OTHER)) continue; count++; if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0 && (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) length += 8; /* "default:" */ switch (ap->tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: if (want_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) { length += 6; /* "owner@" */ break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_USER: case ARCHIVE_ENTRY_ACL_MASK: length += 4; /* "user", "mask" */ break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: if (want_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) { length += 6; /* "group@" */ break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_GROUP: case ARCHIVE_ENTRY_ACL_OTHER: length += 5; /* "group", "other" */ break; case ARCHIVE_ENTRY_ACL_EVERYONE: length += 9; /* "everyone@" */ break; } length += 1; /* colon after tag */ if (ap->tag == ARCHIVE_ENTRY_ACL_USER || ap->tag == ARCHIVE_ENTRY_ACL_GROUP) { if (wide) { r = archive_mstring_get_wcs(a, &ap->name, &wname); if (r == 0 && wname != NULL) length += wcslen(wname); else if (r < 0 && errno == ENOMEM) return (0); else length += sizeof(uid_t) * 3 + 1; } else { r = archive_mstring_get_mbs_l(&ap->name, &name, &len, sc); if (r != 0) return (0); if (len > 0 && name != NULL) length += len; else length += sizeof(uid_t) * 3 + 1; } length += 1; /* colon after user or group name */ } else if (want_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) length += 1; /* 2nd colon empty user,group or other */ if (((flags & ARCHIVE_ENTRY_ACL_STYLE_SOLARIS) != 0) && ((want_type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) && (ap->tag == ARCHIVE_ENTRY_ACL_OTHER || ap->tag == ARCHIVE_ENTRY_ACL_MASK)) { /* Solaris has no colon after other: and mask: */ length = length - 1; } if (want_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) { /* rwxpdDaARWcCos:fdinSFI:deny */ length += 27; if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DENY) == 0) length += 1; /* allow, alarm, audit */ } else length += 3; /* rwx */ if ((ap->tag == ARCHIVE_ENTRY_ACL_USER || ap->tag == ARCHIVE_ENTRY_ACL_GROUP) && (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) != 0) { length += 1; /* colon */ /* ID digit count */ idlen = 1; tmp = ap->id; while (tmp > 9) { tmp = tmp / 10; idlen++; } length += idlen; } length ++; /* entry separator */ } /* Add filemode-mapping access entries to the length */ if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { if ((flags & ARCHIVE_ENTRY_ACL_STYLE_SOLARIS) != 0) { /* "user::rwx\ngroup::rwx\nother:rwx\n" */ length += 31; } else { /* "user::rwx\ngroup::rwx\nother::rwx\n" */ length += 32; } } else if (count == 0) return (0); /* The terminating character is included in count */ return (length); } /* * Generate a wide text version of the ACL. The flags parameter controls * the type and style of the generated ACL. */ wchar_t * archive_acl_to_text_w(struct archive_acl *acl, ssize_t *text_len, int flags, struct archive *a) { int count; ssize_t length; size_t len; const wchar_t *wname; const wchar_t *prefix; wchar_t separator; struct archive_acl_entry *ap; int id, r, want_type; wchar_t *wp, *ws; want_type = archive_acl_text_want_type(acl, flags); /* Both NFSv4 and POSIX.1 types found */ if (want_type == 0) return (NULL); if (want_type == ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) flags |= ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT; length = archive_acl_text_len(acl, want_type, flags, 1, a, NULL); if (length == 0) return (NULL); if (flags & ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA) separator = L','; else separator = L'\n'; /* Now, allocate the string and actually populate it. */ wp = ws = (wchar_t *)malloc(length * sizeof(wchar_t)); if (wp == NULL) { if (errno == ENOMEM) __archive_errx(1, "No memory"); return (NULL); } count = 0; if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_USER_OBJ, flags, NULL, acl->mode & 0700, -1); *wp++ = separator; append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_GROUP_OBJ, flags, NULL, acl->mode & 0070, -1); *wp++ = separator; append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_OTHER, flags, NULL, acl->mode & 0007, -1); count += 3; } for (ap = acl->acl_head; ap != NULL; ap = ap->next) { if ((ap->type & want_type) == 0) continue; /* * Filemode-mapping ACL entries are stored exclusively in * ap->mode so they should not be in the list */ if ((ap->type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (ap->tag == ARCHIVE_ENTRY_ACL_USER_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_GROUP_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_OTHER)) continue; if (ap->type == ARCHIVE_ENTRY_ACL_TYPE_DEFAULT && (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) != 0) prefix = L"default:"; else prefix = NULL; r = archive_mstring_get_wcs(a, &ap->name, &wname); if (r == 0) { if (count > 0) *wp++ = separator; if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) id = ap->id; else id = -1; append_entry_w(&wp, prefix, ap->type, ap->tag, flags, wname, ap->permset, id); count++; } else if (r < 0 && errno == ENOMEM) { free(ws); return (NULL); } } /* Add terminating character */ *wp++ = L'\0'; len = wcslen(ws); if ((ssize_t)len > (length - 1)) __archive_errx(1, "Buffer overrun"); if (text_len != NULL) *text_len = len; return (ws); } static void append_id_w(wchar_t **wp, int id) { if (id < 0) id = 0; if (id > 9) append_id_w(wp, id / 10); *(*wp)++ = L"0123456789"[id % 10]; } static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int type, int tag, int flags, const wchar_t *wname, int perm, int id) { int i; if (prefix != NULL) { wcscpy(*wp, prefix); *wp += wcslen(*wp); } switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: wname = NULL; id = -1; if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { wcscpy(*wp, L"owner@"); break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_USER: wcscpy(*wp, L"user"); break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: wname = NULL; id = -1; if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { wcscpy(*wp, L"group@"); break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_GROUP: wcscpy(*wp, L"group"); break; case ARCHIVE_ENTRY_ACL_MASK: wcscpy(*wp, L"mask"); wname = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_OTHER: wcscpy(*wp, L"other"); wname = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_EVERYONE: wcscpy(*wp, L"everyone@"); wname = NULL; id = -1; break; } *wp += wcslen(*wp); *(*wp)++ = L':'; if (((type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) || tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { if (wname != NULL) { wcscpy(*wp, wname); *wp += wcslen(*wp); } else if (tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { append_id_w(wp, id); if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) id = -1; } /* Solaris style has no second colon after other and mask */ if (((flags & ARCHIVE_ENTRY_ACL_STYLE_SOLARIS) == 0) || (tag != ARCHIVE_ENTRY_ACL_OTHER && tag != ARCHIVE_ENTRY_ACL_MASK)) *(*wp)++ = L':'; } if ((type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) { /* POSIX.1e ACL perms */ *(*wp)++ = (perm & 0444) ? L'r' : L'-'; *(*wp)++ = (perm & 0222) ? L'w' : L'-'; *(*wp)++ = (perm & 0111) ? L'x' : L'-'; } else { /* NFSv4 ACL perms */ for (i = 0; i < nfsv4_acl_perm_map_size; i++) { if (perm & nfsv4_acl_perm_map[i].perm) *(*wp)++ = nfsv4_acl_perm_map[i].wc; else if ((flags & ARCHIVE_ENTRY_ACL_STYLE_COMPACT) == 0) *(*wp)++ = L'-'; } *(*wp)++ = L':'; for (i = 0; i < nfsv4_acl_flag_map_size; i++) { if (perm & nfsv4_acl_flag_map[i].perm) *(*wp)++ = nfsv4_acl_flag_map[i].wc; else if ((flags & ARCHIVE_ENTRY_ACL_STYLE_COMPACT) == 0) *(*wp)++ = L'-'; } *(*wp)++ = L':'; switch (type) { case ARCHIVE_ENTRY_ACL_TYPE_ALLOW: wcscpy(*wp, L"allow"); break; case ARCHIVE_ENTRY_ACL_TYPE_DENY: wcscpy(*wp, L"deny"); break; case ARCHIVE_ENTRY_ACL_TYPE_AUDIT: wcscpy(*wp, L"audit"); break; case ARCHIVE_ENTRY_ACL_TYPE_ALARM: wcscpy(*wp, L"alarm"); break; default: break; } *wp += wcslen(*wp); } if (id != -1) { *(*wp)++ = L':'; append_id_w(wp, id); } } /* * Generate a text version of the ACL. The flags parameter controls * the type and style of the generated ACL. */ char * archive_acl_to_text_l(struct archive_acl *acl, ssize_t *text_len, int flags, struct archive_string_conv *sc) { int count; ssize_t length; size_t len; const char *name; const char *prefix; char separator; struct archive_acl_entry *ap; int id, r, want_type; char *p, *s; want_type = archive_acl_text_want_type(acl, flags); /* Both NFSv4 and POSIX.1 types found */ if (want_type == 0) return (NULL); if (want_type == ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) flags |= ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT; length = archive_acl_text_len(acl, want_type, flags, 0, NULL, sc); if (length == 0) return (NULL); if (flags & ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA) separator = ','; else separator = '\n'; /* Now, allocate the string and actually populate it. */ p = s = (char *)malloc(length * sizeof(char)); if (p == NULL) { if (errno == ENOMEM) __archive_errx(1, "No memory"); return (NULL); } count = 0; if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_USER_OBJ, flags, NULL, acl->mode & 0700, -1); *p++ = separator; append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_GROUP_OBJ, flags, NULL, acl->mode & 0070, -1); *p++ = separator; append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_OTHER, flags, NULL, acl->mode & 0007, -1); count += 3; } for (ap = acl->acl_head; ap != NULL; ap = ap->next) { if ((ap->type & want_type) == 0) continue; /* * Filemode-mapping ACL entries are stored exclusively in * ap->mode so they should not be in the list */ if ((ap->type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (ap->tag == ARCHIVE_ENTRY_ACL_USER_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_GROUP_OBJ || ap->tag == ARCHIVE_ENTRY_ACL_OTHER)) continue; if (ap->type == ARCHIVE_ENTRY_ACL_TYPE_DEFAULT && (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) != 0) prefix = "default:"; else prefix = NULL; r = archive_mstring_get_mbs_l( &ap->name, &name, &len, sc); if (r != 0) { free(s); return (NULL); } if (count > 0) *p++ = separator; if (name == NULL || (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID)) { id = ap->id; } else { id = -1; } append_entry(&p, prefix, ap->type, ap->tag, flags, name, ap->permset, id); count++; } /* Add terminating character */ *p++ = '\0'; len = strlen(s); if ((ssize_t)len > (length - 1)) __archive_errx(1, "Buffer overrun"); if (text_len != NULL) *text_len = len; return (s); } static void append_id(char **p, int id) { if (id < 0) id = 0; if (id > 9) append_id(p, id / 10); *(*p)++ = "0123456789"[id % 10]; } static void append_entry(char **p, const char *prefix, int type, int tag, int flags, const char *name, int perm, int id) { int i; if (prefix != NULL) { strcpy(*p, prefix); *p += strlen(*p); } switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: name = NULL; id = -1; if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { strcpy(*p, "owner@"); break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_USER: strcpy(*p, "user"); break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: name = NULL; id = -1; if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { strcpy(*p, "group@"); break; } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_GROUP: strcpy(*p, "group"); break; case ARCHIVE_ENTRY_ACL_MASK: strcpy(*p, "mask"); name = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_OTHER: strcpy(*p, "other"); name = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_EVERYONE: strcpy(*p, "everyone@"); name = NULL; id = -1; break; } *p += strlen(*p); *(*p)++ = ':'; if (((type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) || tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { if (name != NULL) { strcpy(*p, name); *p += strlen(*p); } else if (tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { append_id(p, id); if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) id = -1; } /* Solaris style has no second colon after other and mask */ if (((flags & ARCHIVE_ENTRY_ACL_STYLE_SOLARIS) == 0) || (tag != ARCHIVE_ENTRY_ACL_OTHER && tag != ARCHIVE_ENTRY_ACL_MASK)) *(*p)++ = ':'; } if ((type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) != 0) { /* POSIX.1e ACL perms */ *(*p)++ = (perm & 0444) ? 'r' : '-'; *(*p)++ = (perm & 0222) ? 'w' : '-'; *(*p)++ = (perm & 0111) ? 'x' : '-'; } else { /* NFSv4 ACL perms */ for (i = 0; i < nfsv4_acl_perm_map_size; i++) { if (perm & nfsv4_acl_perm_map[i].perm) *(*p)++ = nfsv4_acl_perm_map[i].c; else if ((flags & ARCHIVE_ENTRY_ACL_STYLE_COMPACT) == 0) *(*p)++ = '-'; } *(*p)++ = ':'; for (i = 0; i < nfsv4_acl_flag_map_size; i++) { if (perm & nfsv4_acl_flag_map[i].perm) *(*p)++ = nfsv4_acl_flag_map[i].c; else if ((flags & ARCHIVE_ENTRY_ACL_STYLE_COMPACT) == 0) *(*p)++ = '-'; } *(*p)++ = ':'; switch (type) { case ARCHIVE_ENTRY_ACL_TYPE_ALLOW: strcpy(*p, "allow"); break; case ARCHIVE_ENTRY_ACL_TYPE_DENY: strcpy(*p, "deny"); break; case ARCHIVE_ENTRY_ACL_TYPE_AUDIT: strcpy(*p, "audit"); break; case ARCHIVE_ENTRY_ACL_TYPE_ALARM: strcpy(*p, "alarm"); break; } *p += strlen(*p); } if (id != -1) { *(*p)++ = ':'; append_id(p, id); } } /* * Parse a wide ACL text string. * * The want_type argument may be one of the following: * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - text is a POSIX.1e ACL of type ACCESS * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - text is a POSIX.1e ACL of type DEFAULT * ARCHIVE_ENTRY_ACL_TYPE_NFS4 - text is as a NFSv4 ACL * * POSIX.1e ACL entries prefixed with "default:" are treated as * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT unless type is ARCHIVE_ENTRY_ACL_TYPE_NFS4 */ int archive_acl_from_text_w(struct archive_acl *acl, const wchar_t *text, int want_type) { struct { const wchar_t *start; const wchar_t *end; } field[6], name; const wchar_t *s, *st; int numfields, fields, n, r, sol, ret; int type, types, tag, permset, id; size_t len; wchar_t sep; ret = ARCHIVE_OK; types = 0; switch (want_type) { case ARCHIVE_ENTRY_ACL_TYPE_POSIX1E: want_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; __LA_FALLTHROUGH; case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: numfields = 5; break; case ARCHIVE_ENTRY_ACL_TYPE_NFS4: numfields = 6; break; default: return (ARCHIVE_FATAL); } while (text != NULL && *text != L'\0') { /* * Parse the fields out of the next entry, * advance 'text' to start of next entry. */ fields = 0; do { const wchar_t *start, *end; next_field_w(&text, &start, &end, &sep); if (fields < numfields) { field[fields].start = start; field[fields].end = end; } ++fields; } while (sep == L':'); /* Set remaining fields to blank. */ for (n = fields; n < numfields; ++n) field[n].start = field[n].end = NULL; if (field[0].start != NULL && *(field[0].start) == L'#') { /* Comment, skip entry */ continue; } n = 0; sol = 0; id = -1; permset = 0; name.start = name.end = NULL; if (want_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) { /* POSIX.1e ACLs */ /* * Default keyword "default:user::rwx" * if found, we have one more field * * We also support old Solaris extension: * "defaultuser::rwx" is the default ACL corresponding * to "user::rwx", etc. valid only for first field */ s = field[0].start; len = field[0].end - field[0].start; if (*s == L'd' && (len == 1 || (len >= 7 && wmemcmp((s + 1), L"efault", 6) == 0))) { type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; if (len > 7) field[0].start += 7; else n = 1; } else type = want_type; /* Check for a numeric ID in field n+1 or n+3. */ isint_w(field[n + 1].start, field[n + 1].end, &id); /* Field n+3 is optional. */ if (id == -1 && fields > n+3) isint_w(field[n + 3].start, field[n + 3].end, &id); tag = 0; s = field[n].start; st = field[n].start + 1; len = field[n].end - field[n].start; switch (*s) { case L'u': if (len == 1 || (len == 4 && wmemcmp(st, L"ser", 3) == 0)) tag = ARCHIVE_ENTRY_ACL_USER_OBJ; break; case L'g': if (len == 1 || (len == 5 && wmemcmp(st, L"roup", 4) == 0)) tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; break; case L'o': if (len == 1 || (len == 5 && wmemcmp(st, L"ther", 4) == 0)) tag = ARCHIVE_ENTRY_ACL_OTHER; break; case L'm': if (len == 1 || (len == 4 && wmemcmp(st, L"ask", 3) == 0)) tag = ARCHIVE_ENTRY_ACL_MASK; break; default: break; } switch (tag) { case ARCHIVE_ENTRY_ACL_OTHER: case ARCHIVE_ENTRY_ACL_MASK: if (fields == (n + 2) && field[n + 1].start < field[n + 1].end && ismode_w(field[n + 1].start, field[n + 1].end, &permset)) { /* This is Solaris-style "other:rwx" */ sol = 1; } else if (fields == (n + 3) && field[n + 1].start < field[n + 1].end) { /* Invalid mask or other field */ ret = ARCHIVE_WARN; continue; } break; case ARCHIVE_ENTRY_ACL_USER_OBJ: case ARCHIVE_ENTRY_ACL_GROUP_OBJ: if (id != -1 || field[n + 1].start < field[n + 1].end) { name = field[n + 1]; if (tag == ARCHIVE_ENTRY_ACL_USER_OBJ) tag = ARCHIVE_ENTRY_ACL_USER; else tag = ARCHIVE_ENTRY_ACL_GROUP; } break; default: /* Invalid tag, skip entry */ ret = ARCHIVE_WARN; continue; } /* * Without "default:" we expect mode in field 2 * Exception: Solaris other and mask fields */ if (permset == 0 && !ismode_w(field[n + 2 - sol].start, field[n + 2 - sol].end, &permset)) { /* Invalid mode, skip entry */ ret = ARCHIVE_WARN; continue; } } else { /* NFS4 ACLs */ s = field[0].start; len = field[0].end - field[0].start; tag = 0; switch (len) { case 4: if (wmemcmp(s, L"user", 4) == 0) tag = ARCHIVE_ENTRY_ACL_USER; break; case 5: if (wmemcmp(s, L"group", 5) == 0) tag = ARCHIVE_ENTRY_ACL_GROUP; break; case 6: if (wmemcmp(s, L"owner@", 6) == 0) tag = ARCHIVE_ENTRY_ACL_USER_OBJ; else if (wmemcmp(s, L"group@", len) == 0) tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; break; case 9: if (wmemcmp(s, L"everyone@", 9) == 0) tag = ARCHIVE_ENTRY_ACL_EVERYONE; default: break; } if (tag == 0) { /* Invalid tag, skip entry */ ret = ARCHIVE_WARN; continue; } else if (tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { n = 1; name = field[1]; isint_w(name.start, name.end, &id); } else n = 0; if (!is_nfs4_perms_w(field[1 + n].start, field[1 + n].end, &permset)) { /* Invalid NFSv4 perms, skip entry */ ret = ARCHIVE_WARN; continue; } if (!is_nfs4_flags_w(field[2 + n].start, field[2 + n].end, &permset)) { /* Invalid NFSv4 flags, skip entry */ ret = ARCHIVE_WARN; continue; } s = field[3 + n].start; len = field[3 + n].end - field[3 + n].start; type = 0; if (len == 4) { if (wmemcmp(s, L"deny", 4) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_DENY; } else if (len == 5) { if (wmemcmp(s, L"allow", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; else if (wmemcmp(s, L"audit", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT; else if (wmemcmp(s, L"alarm", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_ALARM; } if (type == 0) { /* Invalid entry type, skip entry */ ret = ARCHIVE_WARN; continue; } isint_w(field[4 + n].start, field[4 + n].end, &id); } /* Add entry to the internal list. */ r = archive_acl_add_entry_w_len(acl, type, permset, tag, id, name.start, name.end - name.start); if (r < ARCHIVE_WARN) return (r); if (r != ARCHIVE_OK) ret = ARCHIVE_WARN; types |= type; } /* Reset ACL */ archive_acl_reset(acl, types); return (ret); } /* * Parse a string to a positive decimal integer. Returns true if * the string is non-empty and consists only of decimal digits, * false otherwise. */ static int isint_w(const wchar_t *start, const wchar_t *end, int *result) { int n = 0; if (start >= end) return (0); while (start < end) { if (*start < '0' || *start > '9') return (0); if (n > (INT_MAX / 10) || (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) { n = INT_MAX; } else { n *= 10; n += *start - '0'; } start++; } *result = n; return (1); } /* * Parse a string as a mode field. Returns true if * the string is non-empty and consists only of mode characters, * false otherwise. */ static int ismode_w(const wchar_t *start, const wchar_t *end, int *permset) { const wchar_t *p; if (start >= end) return (0); p = start; *permset = 0; while (p < end) { switch (*p++) { case L'r': case L'R': *permset |= ARCHIVE_ENTRY_ACL_READ; break; case L'w': case L'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE; break; case L'x': case L'X': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case L'-': break; default: return (0); } } return (1); } /* * Parse a string as a NFS4 ACL permission field. * Returns true if the string is non-empty and consists only of NFS4 ACL * permission characters, false otherwise */ static int is_nfs4_perms_w(const wchar_t *start, const wchar_t *end, int *permset) { const wchar_t *p = start; while (p < end) { switch (*p++) { case L'r': *permset |= ARCHIVE_ENTRY_ACL_READ_DATA; break; case L'w': *permset |= ARCHIVE_ENTRY_ACL_WRITE_DATA; break; case L'x': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case L'p': *permset |= ARCHIVE_ENTRY_ACL_APPEND_DATA; break; case L'D': *permset |= ARCHIVE_ENTRY_ACL_DELETE_CHILD; break; case L'd': *permset |= ARCHIVE_ENTRY_ACL_DELETE; break; case L'a': *permset |= ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES; break; case L'A': *permset |= ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES; break; case L'R': *permset |= ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS; break; case L'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS; break; case L'c': *permset |= ARCHIVE_ENTRY_ACL_READ_ACL; break; case L'C': *permset |= ARCHIVE_ENTRY_ACL_WRITE_ACL; break; case L'o': *permset |= ARCHIVE_ENTRY_ACL_WRITE_OWNER; break; case L's': *permset |= ARCHIVE_ENTRY_ACL_SYNCHRONIZE; break; case L'-': break; default: return(0); } } return (1); } /* * Parse a string as a NFS4 ACL flags field. * Returns true if the string is non-empty and consists only of NFS4 ACL * flag characters, false otherwise */ static int is_nfs4_flags_w(const wchar_t *start, const wchar_t *end, int *permset) { const wchar_t *p = start; while (p < end) { switch(*p++) { case L'f': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT; break; case L'd': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT; break; case L'i': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY; break; case L'n': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT; break; case L'S': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS; break; case L'F': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS; break; case L'I': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_INHERITED; break; case L'-': break; default: return (0); } } return (1); } /* * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated * to point to just after the separator. *start points to the first * character of the matched text and *end just after the last * character of the matched identifier. In particular *end - *start * is the length of the field body, not including leading or trailing * whitespace. */ static void next_field_w(const wchar_t **wp, const wchar_t **start, const wchar_t **end, wchar_t *sep) { /* Skip leading whitespace to find start of field. */ while (**wp == L' ' || **wp == L'\t' || **wp == L'\n') { (*wp)++; } *start = *wp; /* Scan for the separator. */ while (**wp != L'\0' && **wp != L',' && **wp != L':' && **wp != L'\n' && **wp != L'#') { (*wp)++; } *sep = **wp; /* Locate end of field, trim trailing whitespace if necessary */ if (*wp == *start) { *end = *wp; } else { *end = *wp - 1; while (**end == L' ' || **end == L'\t' || **end == L'\n') { (*end)--; } (*end)++; } /* Handle in-field comments */ if (*sep == L'#') { while (**wp != L'\0' && **wp != L',' && **wp != L'\n') { (*wp)++; } *sep = **wp; } /* Adjust scanner location. */ if (**wp != L'\0') (*wp)++; } /* * Parse an ACL text string. * * The want_type argument may be one of the following: * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - text is a POSIX.1e ACL of type ACCESS * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - text is a POSIX.1e ACL of type DEFAULT * ARCHIVE_ENTRY_ACL_TYPE_NFS4 - text is as a NFSv4 ACL * * POSIX.1e ACL entries prefixed with "default:" are treated as * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT unless type is ARCHIVE_ENTRY_ACL_TYPE_NFS4 */ int archive_acl_from_text_l(struct archive_acl *acl, const char *text, int want_type, struct archive_string_conv *sc) { struct { const char *start; const char *end; } field[6], name; const char *s, *st; int numfields, fields, n, r, sol, ret; int type, types, tag, permset, id; size_t len; char sep; switch (want_type) { case ARCHIVE_ENTRY_ACL_TYPE_POSIX1E: want_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; __LA_FALLTHROUGH; case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: numfields = 5; break; case ARCHIVE_ENTRY_ACL_TYPE_NFS4: numfields = 6; break; default: return (ARCHIVE_FATAL); } ret = ARCHIVE_OK; types = 0; while (text != NULL && *text != '\0') { /* * Parse the fields out of the next entry, * advance 'text' to start of next entry. */ fields = 0; do { const char *start, *end; next_field(&text, &start, &end, &sep); if (fields < numfields) { field[fields].start = start; field[fields].end = end; } ++fields; } while (sep == ':'); /* Set remaining fields to blank. */ for (n = fields; n < numfields; ++n) field[n].start = field[n].end = NULL; if (field[0].start != NULL && *(field[0].start) == '#') { /* Comment, skip entry */ continue; } n = 0; sol = 0; id = -1; permset = 0; name.start = name.end = NULL; if (want_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) { /* POSIX.1e ACLs */ /* * Default keyword "default:user::rwx" * if found, we have one more field * * We also support old Solaris extension: * "defaultuser::rwx" is the default ACL corresponding * to "user::rwx", etc. valid only for first field */ s = field[0].start; len = field[0].end - field[0].start; if (*s == 'd' && (len == 1 || (len >= 7 && memcmp((s + 1), "efault", 6) == 0))) { type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; if (len > 7) field[0].start += 7; else n = 1; } else type = want_type; /* Check for a numeric ID in field n+1 or n+3. */ isint(field[n + 1].start, field[n + 1].end, &id); /* Field n+3 is optional. */ if (id == -1 && fields > (n + 3)) isint(field[n + 3].start, field[n + 3].end, &id); tag = 0; s = field[n].start; st = field[n].start + 1; len = field[n].end - field[n].start; + if (len == 0) { + ret = ARCHIVE_WARN; + continue; + } + switch (*s) { case 'u': if (len == 1 || (len == 4 && memcmp(st, "ser", 3) == 0)) tag = ARCHIVE_ENTRY_ACL_USER_OBJ; break; case 'g': if (len == 1 || (len == 5 && memcmp(st, "roup", 4) == 0)) tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; break; case 'o': if (len == 1 || (len == 5 && memcmp(st, "ther", 4) == 0)) tag = ARCHIVE_ENTRY_ACL_OTHER; break; case 'm': if (len == 1 || (len == 4 && memcmp(st, "ask", 3) == 0)) tag = ARCHIVE_ENTRY_ACL_MASK; break; default: break; } switch (tag) { case ARCHIVE_ENTRY_ACL_OTHER: case ARCHIVE_ENTRY_ACL_MASK: if (fields == (n + 2) && field[n + 1].start < field[n + 1].end && ismode(field[n + 1].start, field[n + 1].end, &permset)) { /* This is Solaris-style "other:rwx" */ sol = 1; } else if (fields == (n + 3) && field[n + 1].start < field[n + 1].end) { /* Invalid mask or other field */ ret = ARCHIVE_WARN; continue; } break; case ARCHIVE_ENTRY_ACL_USER_OBJ: case ARCHIVE_ENTRY_ACL_GROUP_OBJ: if (id != -1 || field[n + 1].start < field[n + 1].end) { name = field[n + 1]; if (tag == ARCHIVE_ENTRY_ACL_USER_OBJ) tag = ARCHIVE_ENTRY_ACL_USER; else tag = ARCHIVE_ENTRY_ACL_GROUP; } break; default: /* Invalid tag, skip entry */ ret = ARCHIVE_WARN; continue; } /* * Without "default:" we expect mode in field 3 * Exception: Solaris other and mask fields */ if (permset == 0 && !ismode(field[n + 2 - sol].start, field[n + 2 - sol].end, &permset)) { /* Invalid mode, skip entry */ ret = ARCHIVE_WARN; continue; } } else { /* NFS4 ACLs */ s = field[0].start; len = field[0].end - field[0].start; tag = 0; switch (len) { case 4: if (memcmp(s, "user", 4) == 0) tag = ARCHIVE_ENTRY_ACL_USER; break; case 5: if (memcmp(s, "group", 5) == 0) tag = ARCHIVE_ENTRY_ACL_GROUP; break; case 6: if (memcmp(s, "owner@", 6) == 0) tag = ARCHIVE_ENTRY_ACL_USER_OBJ; else if (memcmp(s, "group@", 6) == 0) tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; break; case 9: if (memcmp(s, "everyone@", 9) == 0) tag = ARCHIVE_ENTRY_ACL_EVERYONE; break; default: break; } if (tag == 0) { /* Invalid tag, skip entry */ ret = ARCHIVE_WARN; continue; } else if (tag == ARCHIVE_ENTRY_ACL_USER || tag == ARCHIVE_ENTRY_ACL_GROUP) { n = 1; name = field[1]; isint(name.start, name.end, &id); } else n = 0; if (!is_nfs4_perms(field[1 + n].start, field[1 + n].end, &permset)) { /* Invalid NFSv4 perms, skip entry */ ret = ARCHIVE_WARN; continue; } if (!is_nfs4_flags(field[2 + n].start, field[2 + n].end, &permset)) { /* Invalid NFSv4 flags, skip entry */ ret = ARCHIVE_WARN; continue; } s = field[3 + n].start; len = field[3 + n].end - field[3 + n].start; type = 0; if (len == 4) { if (memcmp(s, "deny", 4) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_DENY; } else if (len == 5) { if (memcmp(s, "allow", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; else if (memcmp(s, "audit", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT; else if (memcmp(s, "alarm", 5) == 0) type = ARCHIVE_ENTRY_ACL_TYPE_ALARM; } if (type == 0) { /* Invalid entry type, skip entry */ ret = ARCHIVE_WARN; continue; } isint(field[4 + n].start, field[4 + n].end, &id); } /* Add entry to the internal list. */ r = archive_acl_add_entry_len_l(acl, type, permset, tag, id, name.start, name.end - name.start, sc); if (r < ARCHIVE_WARN) return (r); if (r != ARCHIVE_OK) ret = ARCHIVE_WARN; types |= type; } /* Reset ACL */ archive_acl_reset(acl, types); return (ret); } /* * Parse a string to a positive decimal integer. Returns true if * the string is non-empty and consists only of decimal digits, * false otherwise. */ static int isint(const char *start, const char *end, int *result) { int n = 0; if (start >= end) return (0); while (start < end) { if (*start < '0' || *start > '9') return (0); if (n > (INT_MAX / 10) || (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) { n = INT_MAX; } else { n *= 10; n += *start - '0'; } start++; } *result = n; return (1); } /* * Parse a string as a mode field. Returns true if * the string is non-empty and consists only of mode characters, * false otherwise. */ static int ismode(const char *start, const char *end, int *permset) { const char *p; if (start >= end) return (0); p = start; *permset = 0; while (p < end) { switch (*p++) { case 'r': case 'R': *permset |= ARCHIVE_ENTRY_ACL_READ; break; case 'w': case 'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE; break; case 'x': case 'X': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case '-': break; default: return (0); } } return (1); } /* * Parse a string as a NFS4 ACL permission field. * Returns true if the string is non-empty and consists only of NFS4 ACL * permission characters, false otherwise */ static int is_nfs4_perms(const char *start, const char *end, int *permset) { const char *p = start; while (p < end) { switch (*p++) { case 'r': *permset |= ARCHIVE_ENTRY_ACL_READ_DATA; break; case 'w': *permset |= ARCHIVE_ENTRY_ACL_WRITE_DATA; break; case 'x': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case 'p': *permset |= ARCHIVE_ENTRY_ACL_APPEND_DATA; break; case 'D': *permset |= ARCHIVE_ENTRY_ACL_DELETE_CHILD; break; case 'd': *permset |= ARCHIVE_ENTRY_ACL_DELETE; break; case 'a': *permset |= ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES; break; case 'A': *permset |= ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES; break; case 'R': *permset |= ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS; break; case 'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS; break; case 'c': *permset |= ARCHIVE_ENTRY_ACL_READ_ACL; break; case 'C': *permset |= ARCHIVE_ENTRY_ACL_WRITE_ACL; break; case 'o': *permset |= ARCHIVE_ENTRY_ACL_WRITE_OWNER; break; case 's': *permset |= ARCHIVE_ENTRY_ACL_SYNCHRONIZE; break; case '-': break; default: return(0); } } return (1); } /* * Parse a string as a NFS4 ACL flags field. * Returns true if the string is non-empty and consists only of NFS4 ACL * flag characters, false otherwise */ static int is_nfs4_flags(const char *start, const char *end, int *permset) { const char *p = start; while (p < end) { switch(*p++) { case 'f': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT; break; case 'd': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT; break; case 'i': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY; break; case 'n': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT; break; case 'S': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS; break; case 'F': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS; break; case 'I': *permset |= ARCHIVE_ENTRY_ACL_ENTRY_INHERITED; break; case '-': break; default: return (0); } } return (1); } /* * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated * to point to just after the separator. *start points to the first * character of the matched text and *end just after the last * character of the matched identifier. In particular *end - *start * is the length of the field body, not including leading or trailing * whitespace. */ static void next_field(const char **p, const char **start, const char **end, char *sep) { /* Skip leading whitespace to find start of field. */ while (**p == ' ' || **p == '\t' || **p == '\n') { (*p)++; } *start = *p; /* Scan for the separator. */ while (**p != '\0' && **p != ',' && **p != ':' && **p != '\n' && **p != '#') { (*p)++; } *sep = **p; /* Locate end of field, trim trailing whitespace if necessary */ if (*p == *start) { *end = *p; } else { *end = *p - 1; while (**end == ' ' || **end == '\t' || **end == '\n') { (*end)--; } (*end)++; } /* Handle in-field comments */ if (*sep == '#') { while (**p != '\0' && **p != ',' && **p != '\n') { (*p)++; } *sep = **p; } /* Adjust scanner location. */ if (**p != '\0') (*p)++; } Index: head/contrib/libarchive/libarchive/archive_read_support_format_rar.c =================================================================== --- head/contrib/libarchive/libarchive/archive_read_support_format_rar.c (revision 342041) +++ head/contrib/libarchive/libarchive/archive_read_support_format_rar.c (revision 342042) @@ -1,2943 +1,2957 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle * Copyright (c) 2011 Andres Mejia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" #ifdef HAVE_ERRNO_H #include #endif #include #include #ifdef HAVE_ZLIB_H #include /* crc32 */ #endif #include "archive.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" #endif #include "archive_endian.h" #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_ppmd7_private.h" #include "archive_private.h" #include "archive_read_private.h" /* RAR signature, also known as the mark header */ #define RAR_SIGNATURE "\x52\x61\x72\x21\x1A\x07\x00" /* Header types */ #define MARK_HEAD 0x72 #define MAIN_HEAD 0x73 #define FILE_HEAD 0x74 #define COMM_HEAD 0x75 #define AV_HEAD 0x76 #define SUB_HEAD 0x77 #define PROTECT_HEAD 0x78 #define SIGN_HEAD 0x79 #define NEWSUB_HEAD 0x7a #define ENDARC_HEAD 0x7b /* Main Header Flags */ #define MHD_VOLUME 0x0001 #define MHD_COMMENT 0x0002 #define MHD_LOCK 0x0004 #define MHD_SOLID 0x0008 #define MHD_NEWNUMBERING 0x0010 #define MHD_AV 0x0020 #define MHD_PROTECT 0x0040 #define MHD_PASSWORD 0x0080 #define MHD_FIRSTVOLUME 0x0100 #define MHD_ENCRYPTVER 0x0200 /* Flags common to all headers */ #define HD_MARKDELETION 0x4000 #define HD_ADD_SIZE_PRESENT 0x8000 /* File Header Flags */ #define FHD_SPLIT_BEFORE 0x0001 #define FHD_SPLIT_AFTER 0x0002 #define FHD_PASSWORD 0x0004 #define FHD_COMMENT 0x0008 #define FHD_SOLID 0x0010 #define FHD_LARGE 0x0100 #define FHD_UNICODE 0x0200 #define FHD_SALT 0x0400 #define FHD_VERSION 0x0800 #define FHD_EXTTIME 0x1000 #define FHD_EXTFLAGS 0x2000 /* File dictionary sizes */ #define DICTIONARY_SIZE_64 0x00 #define DICTIONARY_SIZE_128 0x20 #define DICTIONARY_SIZE_256 0x40 #define DICTIONARY_SIZE_512 0x60 #define DICTIONARY_SIZE_1024 0x80 #define DICTIONARY_SIZE_2048 0xA0 #define DICTIONARY_SIZE_4096 0xC0 #define FILE_IS_DIRECTORY 0xE0 #define DICTIONARY_MASK FILE_IS_DIRECTORY /* OS Flags */ #define OS_MSDOS 0 #define OS_OS2 1 #define OS_WIN32 2 #define OS_UNIX 3 #define OS_MAC_OS 4 #define OS_BEOS 5 /* Compression Methods */ #define COMPRESS_METHOD_STORE 0x30 /* LZSS */ #define COMPRESS_METHOD_FASTEST 0x31 #define COMPRESS_METHOD_FAST 0x32 #define COMPRESS_METHOD_NORMAL 0x33 /* PPMd Variant H */ #define COMPRESS_METHOD_GOOD 0x34 #define COMPRESS_METHOD_BEST 0x35 #define CRC_POLYNOMIAL 0xEDB88320 #define NS_UNIT 10000000 #define DICTIONARY_MAX_SIZE 0x400000 #define MAINCODE_SIZE 299 #define OFFSETCODE_SIZE 60 #define LOWOFFSETCODE_SIZE 17 #define LENGTHCODE_SIZE 28 #define HUFFMAN_TABLE_SIZE \ MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE + LENGTHCODE_SIZE #define MAX_SYMBOL_LENGTH 0xF #define MAX_SYMBOLS 20 /* * Considering L1,L2 cache miss and a calling of write system-call, * the best size of the output buffer(uncompressed buffer) is 128K. * If the structure of extracting process is changed, this value * might be researched again. */ #define UNP_BUFFER_SIZE (128 * 1024) /* Define this here for non-Windows platforms */ #if !((defined(__WIN32__) || defined(_WIN32) || defined(__WIN32)) && !defined(__CYGWIN__)) #define FILE_ATTRIBUTE_DIRECTORY 0x10 #endif /* Fields common to all headers */ struct rar_header { char crc[2]; char type; char flags[2]; char size[2]; }; /* Fields common to all file headers */ struct rar_file_header { char pack_size[4]; char unp_size[4]; char host_os; char file_crc[4]; char file_time[4]; char unp_ver; char method; char name_size[2]; char file_attr[4]; }; struct huffman_tree_node { int branches[2]; }; struct huffman_table_entry { unsigned int length; int value; }; struct huffman_code { struct huffman_tree_node *tree; int numentries; int numallocatedentries; int minlength; int maxlength; int tablesize; struct huffman_table_entry *table; }; struct lzss { unsigned char *window; int mask; int64_t position; }; struct data_block_offsets { int64_t header_size; int64_t start_offset; int64_t end_offset; }; struct rar { /* Entries from main RAR header */ unsigned main_flags; unsigned long file_crc; char reserved1[2]; char reserved2[4]; char encryptver; /* File header entries */ char compression_method; unsigned file_flags; int64_t packed_size; int64_t unp_size; time_t mtime; long mnsec; mode_t mode; char *filename; char *filename_save; size_t filename_save_size; size_t filename_allocated; /* File header optional entries */ char salt[8]; time_t atime; long ansec; time_t ctime; long cnsec; time_t arctime; long arcnsec; /* Fields to help with tracking decompression of files. */ int64_t bytes_unconsumed; int64_t bytes_remaining; int64_t bytes_uncopied; int64_t offset; int64_t offset_outgoing; int64_t offset_seek; char valid; unsigned int unp_offset; unsigned int unp_buffer_size; unsigned char *unp_buffer; unsigned int dictionary_size; char start_new_block; char entry_eof; unsigned long crc_calculated; int found_first_header; char has_endarc_header; struct data_block_offsets *dbo; unsigned int cursor; unsigned int nodes; + char filename_must_match; /* LZSS members */ struct huffman_code maincode; struct huffman_code offsetcode; struct huffman_code lowoffsetcode; struct huffman_code lengthcode; unsigned char lengthtable[HUFFMAN_TABLE_SIZE]; struct lzss lzss; char output_last_match; unsigned int lastlength; unsigned int lastoffset; unsigned int oldoffset[4]; unsigned int lastlowoffset; unsigned int numlowoffsetrepeats; int64_t filterstart; char start_new_table; /* PPMd Variant H members */ char ppmd_valid; char ppmd_eod; char is_ppmd_block; int ppmd_escape; CPpmd7 ppmd7_context; CPpmd7z_RangeDec range_dec; IByteIn bytein; /* * String conversion object. */ int init_default_conversion; struct archive_string_conv *sconv_default; struct archive_string_conv *opt_sconv; struct archive_string_conv *sconv_utf8; struct archive_string_conv *sconv_utf16be; /* * Bit stream reader. */ struct rar_br { #define CACHE_TYPE uint64_t #define CACHE_BITS (8 * sizeof(CACHE_TYPE)) /* Cache buffer. */ CACHE_TYPE cache_buffer; /* Indicates how many bits avail in cache_buffer. */ int cache_avail; ssize_t avail_in; const unsigned char *next_in; } br; /* * Custom field to denote that this archive contains encrypted entries */ int has_encrypted_entries; }; static int archive_read_support_format_rar_capabilities(struct archive_read *); static int archive_read_format_rar_has_encrypted_entries(struct archive_read *); static int archive_read_format_rar_bid(struct archive_read *, int); static int archive_read_format_rar_options(struct archive_read *, const char *, const char *); static int archive_read_format_rar_read_header(struct archive_read *, struct archive_entry *); static int archive_read_format_rar_read_data(struct archive_read *, const void **, size_t *, int64_t *); static int archive_read_format_rar_read_data_skip(struct archive_read *a); static int64_t archive_read_format_rar_seek_data(struct archive_read *, int64_t, int); static int archive_read_format_rar_cleanup(struct archive_read *); /* Support functions */ static int read_header(struct archive_read *, struct archive_entry *, char); static time_t get_time(int); static int read_exttime(const char *, struct rar *, const char *); static int read_symlink_stored(struct archive_read *, struct archive_entry *, struct archive_string_conv *); static int read_data_stored(struct archive_read *, const void **, size_t *, int64_t *); static int read_data_compressed(struct archive_read *, const void **, size_t *, int64_t *); static int rar_br_preparation(struct archive_read *, struct rar_br *); static int parse_codes(struct archive_read *); static void free_codes(struct archive_read *); static int read_next_symbol(struct archive_read *, struct huffman_code *); static int create_code(struct archive_read *, struct huffman_code *, unsigned char *, int, char); static int add_value(struct archive_read *, struct huffman_code *, int, int, int); static int new_node(struct huffman_code *); static int make_table(struct archive_read *, struct huffman_code *); static int make_table_recurse(struct archive_read *, struct huffman_code *, int, struct huffman_table_entry *, int, int); static int64_t expand(struct archive_read *, int64_t); static int copy_from_lzss_window(struct archive_read *, const void **, int64_t, int); static const void *rar_read_ahead(struct archive_read *, size_t, ssize_t *); /* * Bit stream reader. */ /* Check that the cache buffer has enough bits. */ #define rar_br_has(br, n) ((br)->cache_avail >= n) /* Get compressed data by bit. */ #define rar_br_bits(br, n) \ (((uint32_t)((br)->cache_buffer >> \ ((br)->cache_avail - (n)))) & cache_masks[n]) #define rar_br_bits_forced(br, n) \ (((uint32_t)((br)->cache_buffer << \ ((n) - (br)->cache_avail))) & cache_masks[n]) /* Read ahead to make sure the cache buffer has enough compressed data we * will use. * True : completed, there is enough data in the cache buffer. * False : there is no data in the stream. */ #define rar_br_read_ahead(a, br, n) \ ((rar_br_has(br, (n)) || rar_br_fillup(a, br)) || rar_br_has(br, (n))) /* Notify how many bits we consumed. */ #define rar_br_consume(br, n) ((br)->cache_avail -= (n)) #define rar_br_consume_unalined_bits(br) ((br)->cache_avail &= ~7) static const uint32_t cache_masks[] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; /* * Shift away used bits in the cache data and fill it up with following bits. * Call this when cache buffer does not have enough bits you need. * * Returns 1 if the cache buffer is full. * Returns 0 if the cache buffer is not full; input buffer is empty. */ static int rar_br_fillup(struct archive_read *a, struct rar_br *br) { struct rar *rar = (struct rar *)(a->format->data); int n = CACHE_BITS - br->cache_avail; for (;;) { switch (n >> 3) { case 8: if (br->avail_in >= 8) { br->cache_buffer = ((uint64_t)br->next_in[0]) << 56 | ((uint64_t)br->next_in[1]) << 48 | ((uint64_t)br->next_in[2]) << 40 | ((uint64_t)br->next_in[3]) << 32 | ((uint32_t)br->next_in[4]) << 24 | ((uint32_t)br->next_in[5]) << 16 | ((uint32_t)br->next_in[6]) << 8 | (uint32_t)br->next_in[7]; br->next_in += 8; br->avail_in -= 8; br->cache_avail += 8 * 8; rar->bytes_unconsumed += 8; rar->bytes_remaining -= 8; return (1); } break; case 7: if (br->avail_in >= 7) { br->cache_buffer = (br->cache_buffer << 56) | ((uint64_t)br->next_in[0]) << 48 | ((uint64_t)br->next_in[1]) << 40 | ((uint64_t)br->next_in[2]) << 32 | ((uint32_t)br->next_in[3]) << 24 | ((uint32_t)br->next_in[4]) << 16 | ((uint32_t)br->next_in[5]) << 8 | (uint32_t)br->next_in[6]; br->next_in += 7; br->avail_in -= 7; br->cache_avail += 7 * 8; rar->bytes_unconsumed += 7; rar->bytes_remaining -= 7; return (1); } break; case 6: if (br->avail_in >= 6) { br->cache_buffer = (br->cache_buffer << 48) | ((uint64_t)br->next_in[0]) << 40 | ((uint64_t)br->next_in[1]) << 32 | ((uint32_t)br->next_in[2]) << 24 | ((uint32_t)br->next_in[3]) << 16 | ((uint32_t)br->next_in[4]) << 8 | (uint32_t)br->next_in[5]; br->next_in += 6; br->avail_in -= 6; br->cache_avail += 6 * 8; rar->bytes_unconsumed += 6; rar->bytes_remaining -= 6; return (1); } break; case 0: /* We have enough compressed data in * the cache buffer.*/ return (1); default: break; } if (br->avail_in <= 0) { if (rar->bytes_unconsumed > 0) { /* Consume as much as the decompressor * actually used. */ __archive_read_consume(a, rar->bytes_unconsumed); rar->bytes_unconsumed = 0; } br->next_in = rar_read_ahead(a, 1, &(br->avail_in)); if (br->next_in == NULL) return (0); if (br->avail_in == 0) return (0); } br->cache_buffer = (br->cache_buffer << 8) | *br->next_in++; br->avail_in--; br->cache_avail += 8; n -= 8; rar->bytes_unconsumed++; rar->bytes_remaining--; } } static int rar_br_preparation(struct archive_read *a, struct rar_br *br) { struct rar *rar = (struct rar *)(a->format->data); if (rar->bytes_remaining > 0) { br->next_in = rar_read_ahead(a, 1, &(br->avail_in)); if (br->next_in == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); return (ARCHIVE_FATAL); } if (br->cache_avail == 0) (void)rar_br_fillup(a, br); } return (ARCHIVE_OK); } /* Find last bit set */ static inline int rar_fls(unsigned int word) { word |= (word >> 1); word |= (word >> 2); word |= (word >> 4); word |= (word >> 8); word |= (word >> 16); return word - (word >> 1); } /* LZSS functions */ static inline int64_t lzss_position(struct lzss *lzss) { return lzss->position; } static inline int lzss_mask(struct lzss *lzss) { return lzss->mask; } static inline int lzss_size(struct lzss *lzss) { return lzss->mask + 1; } static inline int lzss_offset_for_position(struct lzss *lzss, int64_t pos) { return (int)(pos & lzss->mask); } static inline unsigned char * lzss_pointer_for_position(struct lzss *lzss, int64_t pos) { return &lzss->window[lzss_offset_for_position(lzss, pos)]; } static inline int lzss_current_offset(struct lzss *lzss) { return lzss_offset_for_position(lzss, lzss->position); } static inline uint8_t * lzss_current_pointer(struct lzss *lzss) { return lzss_pointer_for_position(lzss, lzss->position); } static inline void lzss_emit_literal(struct rar *rar, uint8_t literal) { *lzss_current_pointer(&rar->lzss) = literal; rar->lzss.position++; } static inline void lzss_emit_match(struct rar *rar, int offset, int length) { int dstoffs = lzss_current_offset(&rar->lzss); int srcoffs = (dstoffs - offset) & lzss_mask(&rar->lzss); int l, li, remaining; unsigned char *d, *s; remaining = length; while (remaining > 0) { l = remaining; if (dstoffs > srcoffs) { if (l > lzss_size(&rar->lzss) - dstoffs) l = lzss_size(&rar->lzss) - dstoffs; } else { if (l > lzss_size(&rar->lzss) - srcoffs) l = lzss_size(&rar->lzss) - srcoffs; } d = &(rar->lzss.window[dstoffs]); s = &(rar->lzss.window[srcoffs]); if ((dstoffs + l < srcoffs) || (srcoffs + l < dstoffs)) memcpy(d, s, l); else { for (li = 0; li < l; li++) d[li] = s[li]; } remaining -= l; dstoffs = (dstoffs + l) & lzss_mask(&(rar->lzss)); srcoffs = (srcoffs + l) & lzss_mask(&(rar->lzss)); } rar->lzss.position += length; } static Byte ppmd_read(void *p) { struct archive_read *a = ((IByteIn*)p)->a; struct rar *rar = (struct rar *)(a->format->data); struct rar_br *br = &(rar->br); Byte b; if (!rar_br_read_ahead(a, br, 8)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); rar->valid = 0; return 0; } b = rar_br_bits(br, 8); rar_br_consume(br, 8); return b; } int archive_read_support_format_rar(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct rar *rar; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_rar"); rar = (struct rar *)calloc(sizeof(*rar), 1); if (rar == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate rar data"); return (ARCHIVE_FATAL); } /* * Until enough data has been read, we cannot tell about * any encrypted entries yet. */ rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; r = __archive_read_register_format(a, rar, "rar", archive_read_format_rar_bid, archive_read_format_rar_options, archive_read_format_rar_read_header, archive_read_format_rar_read_data, archive_read_format_rar_read_data_skip, archive_read_format_rar_seek_data, archive_read_format_rar_cleanup, archive_read_support_format_rar_capabilities, archive_read_format_rar_has_encrypted_entries); if (r != ARCHIVE_OK) free(rar); return (r); } static int archive_read_support_format_rar_capabilities(struct archive_read * a) { (void)a; /* UNUSED */ return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA); } static int archive_read_format_rar_has_encrypted_entries(struct archive_read *_a) { if (_a && _a->format) { struct rar * rar = (struct rar *)_a->format->data; if (rar) { return rar->has_encrypted_entries; } } return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; } static int archive_read_format_rar_bid(struct archive_read *a, int best_bid) { const char *p; /* If there's already a bid > 30, we'll never win. */ if (best_bid > 30) return (-1); if ((p = __archive_read_ahead(a, 7, NULL)) == NULL) return (-1); if (memcmp(p, RAR_SIGNATURE, 7) == 0) return (30); if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) { /* This is a PE file */ ssize_t offset = 0x10000; ssize_t window = 4096; ssize_t bytes_avail; while (offset + window <= (1024 * 128)) { const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail); if (buff == NULL) { /* Remaining bytes are less than window. */ window >>= 1; if (window < 0x40) return (0); continue; } p = buff + offset; while (p + 7 < buff + bytes_avail) { if (memcmp(p, RAR_SIGNATURE, 7) == 0) return (30); p += 0x10; } offset = p - buff; } } return (0); } static int skip_sfx(struct archive_read *a) { const void *h; const char *p, *q; size_t skip, total; ssize_t bytes, window; total = 0; window = 4096; while (total + window <= (1024 * 128)) { h = __archive_read_ahead(a, window, &bytes); if (h == NULL) { /* Remaining bytes are less than window. */ window >>= 1; if (window < 0x40) goto fatal; continue; } if (bytes < 0x40) goto fatal; p = h; q = p + bytes; /* * Scan ahead until we find something that looks * like the RAR header. */ while (p + 7 < q) { if (memcmp(p, RAR_SIGNATURE, 7) == 0) { skip = p - (const char *)h; __archive_read_consume(a, skip); return (ARCHIVE_OK); } p += 0x10; } skip = p - (const char *)h; __archive_read_consume(a, skip); total += skip; } fatal: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Couldn't find out RAR header"); return (ARCHIVE_FATAL); } static int archive_read_format_rar_options(struct archive_read *a, const char *key, const char *val) { struct rar *rar; int ret = ARCHIVE_FAILED; rar = (struct rar *)(a->format->data); if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "rar: hdrcharset option needs a character-set name"); else { rar->opt_sconv = archive_string_conversion_from_charset( &a->archive, val, 0); if (rar->opt_sconv != NULL) ret = ARCHIVE_OK; else ret = ARCHIVE_FATAL; } return (ret); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } static int archive_read_format_rar_read_header(struct archive_read *a, struct archive_entry *entry) { const void *h; const char *p; struct rar *rar; size_t skip; char head_type; int ret; unsigned flags; unsigned long crc32_expected; a->archive.archive_format = ARCHIVE_FORMAT_RAR; if (a->archive.archive_format_name == NULL) a->archive.archive_format_name = "RAR"; rar = (struct rar *)(a->format->data); /* * It should be sufficient to call archive_read_next_header() for * a reader to determine if an entry is encrypted or not. If the * encryption of an entry is only detectable when calling * archive_read_data(), so be it. We'll do the same check there * as well. */ if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { rar->has_encrypted_entries = 0; } /* RAR files can be generated without EOF headers, so return ARCHIVE_EOF if * this fails. */ if ((h = __archive_read_ahead(a, 7, NULL)) == NULL) return (ARCHIVE_EOF); p = h; if (rar->found_first_header == 0 && ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0)) { /* This is an executable ? Must be self-extracting... */ ret = skip_sfx(a); if (ret < ARCHIVE_WARN) return (ret); } rar->found_first_header = 1; while (1) { unsigned long crc32_val; if ((h = __archive_read_ahead(a, 7, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; head_type = p[2]; switch(head_type) { case MARK_HEAD: if (memcmp(p, RAR_SIGNATURE, 7) != 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid marker header"); return (ARCHIVE_FATAL); } __archive_read_consume(a, 7); break; case MAIN_HEAD: rar->main_flags = archive_le16dec(p + 3); skip = archive_le16dec(p + 5); if (skip < 7 + sizeof(rar->reserved1) + sizeof(rar->reserved2)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size"); return (ARCHIVE_FATAL); } if ((h = __archive_read_ahead(a, skip, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; memcpy(rar->reserved1, p + 7, sizeof(rar->reserved1)); memcpy(rar->reserved2, p + 7 + sizeof(rar->reserved1), sizeof(rar->reserved2)); if (rar->main_flags & MHD_ENCRYPTVER) { if (skip < 7 + sizeof(rar->reserved1) + sizeof(rar->reserved2)+1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size"); return (ARCHIVE_FATAL); } rar->encryptver = *(p + 7 + sizeof(rar->reserved1) + sizeof(rar->reserved2)); } /* Main header is password encrypted, so we cannot read any file names or any other info about files from the header. */ if (rar->main_flags & MHD_PASSWORD) { archive_entry_set_is_metadata_encrypted(entry, 1); archive_entry_set_is_data_encrypted(entry, 1); rar->has_encrypted_entries = 1; archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "RAR encryption support unavailable."); return (ARCHIVE_FATAL); } crc32_val = crc32(0, (const unsigned char *)p + 2, (unsigned)skip - 2); if ((crc32_val & 0xffff) != archive_le16dec(p)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header CRC error"); return (ARCHIVE_FATAL); } __archive_read_consume(a, skip); break; case FILE_HEAD: return read_header(a, entry, head_type); case COMM_HEAD: case AV_HEAD: case SUB_HEAD: case PROTECT_HEAD: case SIGN_HEAD: case ENDARC_HEAD: flags = archive_le16dec(p + 3); skip = archive_le16dec(p + 5); if (skip < 7) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size too small"); return (ARCHIVE_FATAL); } if (flags & HD_ADD_SIZE_PRESENT) { if (skip < 7 + 4) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size too small"); return (ARCHIVE_FATAL); } if ((h = __archive_read_ahead(a, skip, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; skip += archive_le32dec(p + 7); } /* Skip over the 2-byte CRC at the beginning of the header. */ crc32_expected = archive_le16dec(p); __archive_read_consume(a, 2); skip -= 2; /* Skim the entire header and compute the CRC. */ crc32_val = 0; while (skip > 0) { size_t to_read = skip; ssize_t did_read; if (to_read > 32 * 1024) { to_read = 32 * 1024; } if ((h = __archive_read_ahead(a, to_read, &did_read)) == NULL) { return (ARCHIVE_FATAL); } p = h; crc32_val = crc32(crc32_val, (const unsigned char *)p, (unsigned)did_read); __archive_read_consume(a, did_read); skip -= did_read; } if ((crc32_val & 0xffff) != crc32_expected) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header CRC error"); return (ARCHIVE_FATAL); } if (head_type == ENDARC_HEAD) return (ARCHIVE_EOF); break; case NEWSUB_HEAD: if ((ret = read_header(a, entry, head_type)) < ARCHIVE_WARN) return ret; break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad RAR file"); return (ARCHIVE_FATAL); } } } static int archive_read_format_rar_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct rar *rar = (struct rar *)(a->format->data); int ret; if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { rar->has_encrypted_entries = 0; } if (rar->bytes_unconsumed > 0) { /* Consume as much as the decompressor actually used. */ __archive_read_consume(a, rar->bytes_unconsumed); rar->bytes_unconsumed = 0; } *buff = NULL; if (rar->entry_eof || rar->offset_seek >= rar->unp_size) { *size = 0; *offset = rar->offset; if (*offset < rar->unp_size) *offset = rar->unp_size; return (ARCHIVE_EOF); } switch (rar->compression_method) { case COMPRESS_METHOD_STORE: ret = read_data_stored(a, buff, size, offset); break; case COMPRESS_METHOD_FASTEST: case COMPRESS_METHOD_FAST: case COMPRESS_METHOD_NORMAL: case COMPRESS_METHOD_GOOD: case COMPRESS_METHOD_BEST: ret = read_data_compressed(a, buff, size, offset); if (ret != ARCHIVE_OK && ret != ARCHIVE_WARN) __archive_ppmd7_functions.Ppmd7_Free(&rar->ppmd7_context); break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported compression method for RAR file."); ret = ARCHIVE_FATAL; break; } return (ret); } static int archive_read_format_rar_read_data_skip(struct archive_read *a) { struct rar *rar; int64_t bytes_skipped; int ret; rar = (struct rar *)(a->format->data); if (rar->bytes_unconsumed > 0) { /* Consume as much as the decompressor actually used. */ __archive_read_consume(a, rar->bytes_unconsumed); rar->bytes_unconsumed = 0; } if (rar->bytes_remaining > 0) { bytes_skipped = __archive_read_consume(a, rar->bytes_remaining); if (bytes_skipped < 0) return (ARCHIVE_FATAL); } /* Compressed data to skip must be read from each header in a multivolume * archive. */ if (rar->main_flags & MHD_VOLUME && rar->file_flags & FHD_SPLIT_AFTER) { ret = archive_read_format_rar_read_header(a, a->entry); if (ret == (ARCHIVE_EOF)) ret = archive_read_format_rar_read_header(a, a->entry); if (ret != (ARCHIVE_OK)) return ret; return archive_read_format_rar_read_data_skip(a); } return (ARCHIVE_OK); } static int64_t archive_read_format_rar_seek_data(struct archive_read *a, int64_t offset, int whence) { int64_t client_offset, ret; unsigned int i; struct rar *rar = (struct rar *)(a->format->data); if (rar->compression_method == COMPRESS_METHOD_STORE) { /* Modify the offset for use with SEEK_SET */ switch (whence) { case SEEK_CUR: client_offset = rar->offset_seek; break; case SEEK_END: client_offset = rar->unp_size; break; case SEEK_SET: default: client_offset = 0; } client_offset += offset; if (client_offset < 0) { /* Can't seek past beginning of data block */ return -1; } else if (client_offset > rar->unp_size) { /* * Set the returned offset but only seek to the end of * the data block. */ rar->offset_seek = client_offset; client_offset = rar->unp_size; } client_offset += rar->dbo[0].start_offset; i = 0; while (i < rar->cursor) { i++; client_offset += rar->dbo[i].start_offset - rar->dbo[i-1].end_offset; } if (rar->main_flags & MHD_VOLUME) { /* Find the appropriate offset among the multivolume archive */ while (1) { if (client_offset < rar->dbo[rar->cursor].start_offset && rar->file_flags & FHD_SPLIT_BEFORE) { /* Search backwards for the correct data block */ if (rar->cursor == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Attempt to seek past beginning of RAR data block"); return (ARCHIVE_FAILED); } rar->cursor--; client_offset -= rar->dbo[rar->cursor+1].start_offset - rar->dbo[rar->cursor].end_offset; if (client_offset < rar->dbo[rar->cursor].start_offset) continue; ret = __archive_read_seek(a, rar->dbo[rar->cursor].start_offset - rar->dbo[rar->cursor].header_size, SEEK_SET); if (ret < (ARCHIVE_OK)) return ret; ret = archive_read_format_rar_read_header(a, a->entry); if (ret != (ARCHIVE_OK)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Error during seek of RAR file"); return (ARCHIVE_FAILED); } rar->cursor--; break; } else if (client_offset > rar->dbo[rar->cursor].end_offset && rar->file_flags & FHD_SPLIT_AFTER) { /* Search forward for the correct data block */ rar->cursor++; if (rar->cursor < rar->nodes && client_offset > rar->dbo[rar->cursor].end_offset) { client_offset += rar->dbo[rar->cursor].start_offset - rar->dbo[rar->cursor-1].end_offset; continue; } rar->cursor--; ret = __archive_read_seek(a, rar->dbo[rar->cursor].end_offset, SEEK_SET); if (ret < (ARCHIVE_OK)) return ret; ret = archive_read_format_rar_read_header(a, a->entry); if (ret == (ARCHIVE_EOF)) { rar->has_endarc_header = 1; ret = archive_read_format_rar_read_header(a, a->entry); } if (ret != (ARCHIVE_OK)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Error during seek of RAR file"); return (ARCHIVE_FAILED); } client_offset += rar->dbo[rar->cursor].start_offset - rar->dbo[rar->cursor-1].end_offset; continue; } break; } } ret = __archive_read_seek(a, client_offset, SEEK_SET); if (ret < (ARCHIVE_OK)) return ret; rar->bytes_remaining = rar->dbo[rar->cursor].end_offset - ret; i = rar->cursor; while (i > 0) { i--; ret -= rar->dbo[i+1].start_offset - rar->dbo[i].end_offset; } ret -= rar->dbo[0].start_offset; /* Always restart reading the file after a seek */ __archive_reset_read_data(&a->archive); rar->bytes_unconsumed = 0; rar->offset = 0; /* * If a seek past the end of file was requested, return the requested * offset. */ if (ret == rar->unp_size && rar->offset_seek > rar->unp_size) return rar->offset_seek; /* Return the new offset */ rar->offset_seek = ret; return rar->offset_seek; } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Seeking of compressed RAR files is unsupported"); } return (ARCHIVE_FAILED); } static int archive_read_format_rar_cleanup(struct archive_read *a) { struct rar *rar; rar = (struct rar *)(a->format->data); free_codes(a); free(rar->filename); free(rar->filename_save); free(rar->dbo); free(rar->unp_buffer); free(rar->lzss.window); __archive_ppmd7_functions.Ppmd7_Free(&rar->ppmd7_context); free(rar); (a->format->data) = NULL; return (ARCHIVE_OK); } static int read_header(struct archive_read *a, struct archive_entry *entry, char head_type) { const void *h; const char *p, *endp; struct rar *rar; struct rar_header rar_header; struct rar_file_header file_header; int64_t header_size; unsigned filename_size, end; char *filename; char *strp; char packed_size[8]; char unp_size[8]; int ttime; struct archive_string_conv *sconv, *fn_sconv; unsigned long crc32_val; int ret = (ARCHIVE_OK), ret2; rar = (struct rar *)(a->format->data); /* Setup a string conversion object for non-rar-unicode filenames. */ sconv = rar->opt_sconv; if (sconv == NULL) { if (!rar->init_default_conversion) { rar->sconv_default = archive_string_default_conversion_for_read( &(a->archive)); rar->init_default_conversion = 1; } sconv = rar->sconv_default; } if ((h = __archive_read_ahead(a, 7, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; memcpy(&rar_header, p, sizeof(rar_header)); rar->file_flags = archive_le16dec(rar_header.flags); header_size = archive_le16dec(rar_header.size); if (header_size < (int64_t)sizeof(file_header) + 7) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size"); return (ARCHIVE_FATAL); } crc32_val = crc32(0, (const unsigned char *)p + 2, 7 - 2); __archive_read_consume(a, 7); if (!(rar->file_flags & FHD_SOLID)) { rar->compression_method = 0; rar->packed_size = 0; rar->unp_size = 0; rar->mtime = 0; rar->ctime = 0; rar->atime = 0; rar->arctime = 0; rar->mode = 0; memset(&rar->salt, 0, sizeof(rar->salt)); rar->atime = 0; rar->ansec = 0; rar->ctime = 0; rar->cnsec = 0; rar->mtime = 0; rar->mnsec = 0; rar->arctime = 0; rar->arcnsec = 0; } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "RAR solid archive support unavailable."); return (ARCHIVE_FATAL); } if ((h = __archive_read_ahead(a, (size_t)header_size - 7, NULL)) == NULL) return (ARCHIVE_FATAL); /* File Header CRC check. */ crc32_val = crc32(crc32_val, h, (unsigned)(header_size - 7)); if ((crc32_val & 0xffff) != archive_le16dec(rar_header.crc)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header CRC error"); return (ARCHIVE_FATAL); } /* If no CRC error, Go on parsing File Header. */ p = h; endp = p + header_size - 7; memcpy(&file_header, p, sizeof(file_header)); p += sizeof(file_header); rar->compression_method = file_header.method; ttime = archive_le32dec(file_header.file_time); rar->mtime = get_time(ttime); rar->file_crc = archive_le32dec(file_header.file_crc); if (rar->file_flags & FHD_PASSWORD) { archive_entry_set_is_data_encrypted(entry, 1); rar->has_encrypted_entries = 1; archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "RAR encryption support unavailable."); /* Since it is only the data part itself that is encrypted we can at least extract information about the currently processed entry and don't need to return ARCHIVE_FATAL here. */ /*return (ARCHIVE_FATAL);*/ } if (rar->file_flags & FHD_LARGE) { memcpy(packed_size, file_header.pack_size, 4); memcpy(packed_size + 4, p, 4); /* High pack size */ p += 4; memcpy(unp_size, file_header.unp_size, 4); memcpy(unp_size + 4, p, 4); /* High unpack size */ p += 4; rar->packed_size = archive_le64dec(&packed_size); rar->unp_size = archive_le64dec(&unp_size); } else { rar->packed_size = archive_le32dec(file_header.pack_size); rar->unp_size = archive_le32dec(file_header.unp_size); } if (rar->packed_size < 0 || rar->unp_size < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid sizes specified."); return (ARCHIVE_FATAL); } rar->bytes_remaining = rar->packed_size; /* TODO: RARv3 subblocks contain comments. For now the complete block is * consumed at the end. */ if (head_type == NEWSUB_HEAD) { size_t distance = p - (const char *)h; header_size += rar->packed_size; /* Make sure we have the extended data. */ if ((h = __archive_read_ahead(a, (size_t)header_size - 7, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; endp = p + header_size - 7; p += distance; } filename_size = archive_le16dec(file_header.name_size); if (p + filename_size > endp) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid filename size"); return (ARCHIVE_FATAL); } if (rar->filename_allocated < filename_size * 2 + 2) { char *newptr; size_t newsize = filename_size * 2 + 2; newptr = realloc(rar->filename, newsize); if (newptr == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory."); return (ARCHIVE_FATAL); } rar->filename = newptr; rar->filename_allocated = newsize; } filename = rar->filename; memcpy(filename, p, filename_size); filename[filename_size] = '\0'; if (rar->file_flags & FHD_UNICODE) { if (filename_size != strlen(filename)) { unsigned char highbyte, flagbits, flagbyte; unsigned fn_end, offset; end = filename_size; fn_end = filename_size * 2; filename_size = 0; offset = (unsigned)strlen(filename) + 1; highbyte = *(p + offset++); flagbits = 0; flagbyte = 0; while (offset < end && filename_size < fn_end) { if (!flagbits) { flagbyte = *(p + offset++); flagbits = 8; } flagbits -= 2; switch((flagbyte >> flagbits) & 3) { case 0: filename[filename_size++] = '\0'; filename[filename_size++] = *(p + offset++); break; case 1: filename[filename_size++] = highbyte; filename[filename_size++] = *(p + offset++); break; case 2: filename[filename_size++] = *(p + offset + 1); filename[filename_size++] = *(p + offset); offset += 2; break; case 3: { char extra, high; uint8_t length = *(p + offset++); if (length & 0x80) { extra = *(p + offset++); high = (char)highbyte; } else extra = high = 0; length = (length & 0x7f) + 2; while (length && filename_size < fn_end) { unsigned cp = filename_size >> 1; filename[filename_size++] = high; filename[filename_size++] = p[cp] + extra; length--; } } break; } } if (filename_size > fn_end) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid filename"); return (ARCHIVE_FATAL); } filename[filename_size++] = '\0'; /* * Do not increment filename_size here as the computations below * add the space for the terminating NUL explicitly. */ filename[filename_size] = '\0'; /* Decoded unicode form is UTF-16BE, so we have to update a string * conversion object for it. */ if (rar->sconv_utf16be == NULL) { rar->sconv_utf16be = archive_string_conversion_from_charset( &a->archive, "UTF-16BE", 1); if (rar->sconv_utf16be == NULL) return (ARCHIVE_FATAL); } fn_sconv = rar->sconv_utf16be; strp = filename; while (memcmp(strp, "\x00\x00", 2)) { if (!memcmp(strp, "\x00\\", 2)) *(strp + 1) = '/'; strp += 2; } p += offset; } else { /* * If FHD_UNICODE is set but no unicode data, this file name form * is UTF-8, so we have to update a string conversion object for * it accordingly. */ if (rar->sconv_utf8 == NULL) { rar->sconv_utf8 = archive_string_conversion_from_charset( &a->archive, "UTF-8", 1); if (rar->sconv_utf8 == NULL) return (ARCHIVE_FATAL); } fn_sconv = rar->sconv_utf8; while ((strp = strchr(filename, '\\')) != NULL) *strp = '/'; p += filename_size; } } else { fn_sconv = sconv; while ((strp = strchr(filename, '\\')) != NULL) *strp = '/'; p += filename_size; } /* Split file in multivolume RAR. No more need to process header. */ if (rar->filename_save && filename_size == rar->filename_save_size && !memcmp(rar->filename, rar->filename_save, filename_size + 1)) { __archive_read_consume(a, header_size - 7); rar->cursor++; if (rar->cursor >= rar->nodes) { rar->nodes++; if ((rar->dbo = realloc(rar->dbo, sizeof(*rar->dbo) * rar->nodes)) == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory."); return (ARCHIVE_FATAL); } rar->dbo[rar->cursor].header_size = header_size; rar->dbo[rar->cursor].start_offset = -1; rar->dbo[rar->cursor].end_offset = -1; } if (rar->dbo[rar->cursor].start_offset < 0) { rar->dbo[rar->cursor].start_offset = a->filter->position; rar->dbo[rar->cursor].end_offset = rar->dbo[rar->cursor].start_offset + rar->packed_size; } return ret; } + else if (rar->filename_must_match) + { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Mismatch of file parts split across multi-volume archive"); + return (ARCHIVE_FATAL); + } rar->filename_save = (char*)realloc(rar->filename_save, filename_size + 1); memcpy(rar->filename_save, rar->filename, filename_size + 1); rar->filename_save_size = filename_size; /* Set info for seeking */ free(rar->dbo); if ((rar->dbo = calloc(1, sizeof(*rar->dbo))) == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory."); return (ARCHIVE_FATAL); } rar->dbo[0].header_size = header_size; rar->dbo[0].start_offset = -1; rar->dbo[0].end_offset = -1; rar->cursor = 0; rar->nodes = 1; if (rar->file_flags & FHD_SALT) { if (p + 8 > endp) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size"); return (ARCHIVE_FATAL); } memcpy(rar->salt, p, 8); p += 8; } if (rar->file_flags & FHD_EXTTIME) { if (read_exttime(p, rar, endp) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid header size"); return (ARCHIVE_FATAL); } } __archive_read_consume(a, header_size - 7); rar->dbo[0].start_offset = a->filter->position; rar->dbo[0].end_offset = rar->dbo[0].start_offset + rar->packed_size; switch(file_header.host_os) { case OS_MSDOS: case OS_OS2: case OS_WIN32: rar->mode = archive_le32dec(file_header.file_attr); if (rar->mode & FILE_ATTRIBUTE_DIRECTORY) rar->mode = AE_IFDIR | S_IXUSR | S_IXGRP | S_IXOTH; else rar->mode = AE_IFREG; rar->mode |= S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; break; case OS_UNIX: case OS_MAC_OS: case OS_BEOS: rar->mode = archive_le32dec(file_header.file_attr); break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown file attributes from RAR file's host OS"); return (ARCHIVE_FATAL); } rar->bytes_uncopied = rar->bytes_unconsumed = 0; rar->lzss.position = rar->offset = 0; rar->offset_seek = 0; rar->dictionary_size = 0; rar->offset_outgoing = 0; rar->br.cache_avail = 0; rar->br.avail_in = 0; rar->crc_calculated = 0; rar->entry_eof = 0; rar->valid = 1; rar->is_ppmd_block = 0; rar->start_new_table = 1; free(rar->unp_buffer); rar->unp_buffer = NULL; rar->unp_offset = 0; rar->unp_buffer_size = UNP_BUFFER_SIZE; memset(rar->lengthtable, 0, sizeof(rar->lengthtable)); __archive_ppmd7_functions.Ppmd7_Free(&rar->ppmd7_context); rar->ppmd_valid = rar->ppmd_eod = 0; /* Don't set any archive entries for non-file header types */ if (head_type == NEWSUB_HEAD) return ret; archive_entry_set_mtime(entry, rar->mtime, rar->mnsec); archive_entry_set_ctime(entry, rar->ctime, rar->cnsec); archive_entry_set_atime(entry, rar->atime, rar->ansec); archive_entry_set_size(entry, rar->unp_size); archive_entry_set_mode(entry, rar->mode); if (archive_entry_copy_pathname_l(entry, filename, filename_size, fn_sconv)) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Pathname"); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Pathname cannot be converted from %s to current locale.", archive_string_conversion_charset_name(fn_sconv)); ret = (ARCHIVE_WARN); } if (((rar->mode) & AE_IFMT) == AE_IFLNK) { /* Make sure a symbolic-link file does not have its body. */ rar->bytes_remaining = 0; archive_entry_set_size(entry, 0); /* Read a symbolic-link name. */ if ((ret2 = read_symlink_stored(a, entry, sconv)) < (ARCHIVE_WARN)) return ret2; if (ret > ret2) ret = ret2; } if (rar->bytes_remaining == 0) rar->entry_eof = 1; return ret; } static time_t get_time(int ttime) { struct tm tm; tm.tm_sec = 2 * (ttime & 0x1f); tm.tm_min = (ttime >> 5) & 0x3f; tm.tm_hour = (ttime >> 11) & 0x1f; tm.tm_mday = (ttime >> 16) & 0x1f; tm.tm_mon = ((ttime >> 21) & 0x0f) - 1; tm.tm_year = ((ttime >> 25) & 0x7f) + 80; tm.tm_isdst = -1; return mktime(&tm); } static int read_exttime(const char *p, struct rar *rar, const char *endp) { unsigned rmode, flags, rem, j, count; int ttime, i; struct tm *tm; time_t t; long nsec; if (p + 2 > endp) return (-1); flags = archive_le16dec(p); p += 2; for (i = 3; i >= 0; i--) { t = 0; if (i == 3) t = rar->mtime; rmode = flags >> i * 4; if (rmode & 8) { if (!t) { if (p + 4 > endp) return (-1); ttime = archive_le32dec(p); t = get_time(ttime); p += 4; } rem = 0; count = rmode & 3; if (p + count > endp) return (-1); for (j = 0; j < count; j++) { rem = (((unsigned)(unsigned char)*p) << 16) | (rem >> 8); p++; } tm = localtime(&t); nsec = tm->tm_sec + rem / NS_UNIT; if (rmode & 4) { tm->tm_sec++; t = mktime(tm); } if (i == 3) { rar->mtime = t; rar->mnsec = nsec; } else if (i == 2) { rar->ctime = t; rar->cnsec = nsec; } else if (i == 1) { rar->atime = t; rar->ansec = nsec; } else { rar->arctime = t; rar->arcnsec = nsec; } } } return (0); } static int read_symlink_stored(struct archive_read *a, struct archive_entry *entry, struct archive_string_conv *sconv) { const void *h; const char *p; struct rar *rar; int ret = (ARCHIVE_OK); rar = (struct rar *)(a->format->data); if ((h = rar_read_ahead(a, (size_t)rar->packed_size, NULL)) == NULL) return (ARCHIVE_FATAL); p = h; if (archive_entry_copy_symlink_l(entry, p, (size_t)rar->packed_size, sconv)) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for link"); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "link cannot be converted from %s to current locale.", archive_string_conversion_charset_name(sconv)); ret = (ARCHIVE_WARN); } __archive_read_consume(a, rar->packed_size); return ret; } static int read_data_stored(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct rar *rar; ssize_t bytes_avail; rar = (struct rar *)(a->format->data); if (rar->bytes_remaining == 0 && !(rar->main_flags & MHD_VOLUME && rar->file_flags & FHD_SPLIT_AFTER)) { *buff = NULL; *size = 0; *offset = rar->offset; if (rar->file_crc != rar->crc_calculated) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "File CRC error"); return (ARCHIVE_FATAL); } rar->entry_eof = 1; return (ARCHIVE_EOF); } *buff = rar_read_ahead(a, 1, &bytes_avail); if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); return (ARCHIVE_FATAL); } *size = bytes_avail; *offset = rar->offset; rar->offset += bytes_avail; rar->offset_seek += bytes_avail; rar->bytes_remaining -= bytes_avail; rar->bytes_unconsumed = bytes_avail; /* Calculate File CRC. */ rar->crc_calculated = crc32(rar->crc_calculated, *buff, (unsigned)bytes_avail); return (ARCHIVE_OK); } static int read_data_compressed(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct rar *rar; int64_t start, end, actualend; size_t bs; int ret = (ARCHIVE_OK), sym, code, lzss_offset, length, i; rar = (struct rar *)(a->format->data); do { if (!rar->valid) return (ARCHIVE_FATAL); if (rar->ppmd_eod || (rar->dictionary_size && rar->offset >= rar->unp_size)) { if (rar->unp_offset > 0) { /* * We have unprocessed extracted data. write it out. */ *buff = rar->unp_buffer; *size = rar->unp_offset; *offset = rar->offset_outgoing; rar->offset_outgoing += *size; /* Calculate File CRC. */ rar->crc_calculated = crc32(rar->crc_calculated, *buff, (unsigned)*size); rar->unp_offset = 0; return (ARCHIVE_OK); } *buff = NULL; *size = 0; *offset = rar->offset; if (rar->file_crc != rar->crc_calculated) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "File CRC error"); return (ARCHIVE_FATAL); } rar->entry_eof = 1; return (ARCHIVE_EOF); } if (!rar->is_ppmd_block && rar->dictionary_size && rar->bytes_uncopied > 0) { if (rar->bytes_uncopied > (rar->unp_buffer_size - rar->unp_offset)) bs = rar->unp_buffer_size - rar->unp_offset; else bs = (size_t)rar->bytes_uncopied; ret = copy_from_lzss_window(a, buff, rar->offset, (int)bs); if (ret != ARCHIVE_OK) return (ret); rar->offset += bs; rar->bytes_uncopied -= bs; if (*buff != NULL) { rar->unp_offset = 0; *size = rar->unp_buffer_size; *offset = rar->offset_outgoing; rar->offset_outgoing += *size; /* Calculate File CRC. */ rar->crc_calculated = crc32(rar->crc_calculated, *buff, (unsigned)*size); return (ret); } continue; } if (!rar->br.next_in && (ret = rar_br_preparation(a, &(rar->br))) < ARCHIVE_WARN) return (ret); if (rar->start_new_table && ((ret = parse_codes(a)) < (ARCHIVE_WARN))) return (ret); if (rar->is_ppmd_block) { if ((sym = __archive_ppmd7_functions.Ppmd7_DecodeSymbol( &rar->ppmd7_context, &rar->range_dec.p)) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid symbol"); return (ARCHIVE_FATAL); } if(sym != rar->ppmd_escape) { lzss_emit_literal(rar, sym); rar->bytes_uncopied++; } else { if ((code = __archive_ppmd7_functions.Ppmd7_DecodeSymbol( &rar->ppmd7_context, &rar->range_dec.p)) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid symbol"); return (ARCHIVE_FATAL); } switch(code) { case 0: rar->start_new_table = 1; return read_data_compressed(a, buff, size, offset); case 2: rar->ppmd_eod = 1;/* End Of ppmd Data. */ continue; case 3: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parsing filters is unsupported."); return (ARCHIVE_FAILED); case 4: lzss_offset = 0; for (i = 2; i >= 0; i--) { if ((code = __archive_ppmd7_functions.Ppmd7_DecodeSymbol( &rar->ppmd7_context, &rar->range_dec.p)) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid symbol"); return (ARCHIVE_FATAL); } lzss_offset |= code << (i * 8); } if ((length = __archive_ppmd7_functions.Ppmd7_DecodeSymbol( &rar->ppmd7_context, &rar->range_dec.p)) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid symbol"); return (ARCHIVE_FATAL); } lzss_emit_match(rar, lzss_offset + 2, length + 32); rar->bytes_uncopied += length + 32; break; case 5: if ((length = __archive_ppmd7_functions.Ppmd7_DecodeSymbol( &rar->ppmd7_context, &rar->range_dec.p)) < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid symbol"); return (ARCHIVE_FATAL); } lzss_emit_match(rar, 1, length + 4); rar->bytes_uncopied += length + 4; break; default: lzss_emit_literal(rar, sym); rar->bytes_uncopied++; } } } else { start = rar->offset; end = start + rar->dictionary_size; rar->filterstart = INT64_MAX; if ((actualend = expand(a, end)) < 0) return ((int)actualend); rar->bytes_uncopied = actualend - start; if (rar->bytes_uncopied == 0) { /* Broken RAR files cause this case. * NOTE: If this case were possible on a normal RAR file * we would find out where it was actually bad and * what we would do to solve it. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Internal error extracting RAR file"); return (ARCHIVE_FATAL); } } if (rar->bytes_uncopied > (rar->unp_buffer_size - rar->unp_offset)) bs = rar->unp_buffer_size - rar->unp_offset; else bs = (size_t)rar->bytes_uncopied; ret = copy_from_lzss_window(a, buff, rar->offset, (int)bs); if (ret != ARCHIVE_OK) return (ret); rar->offset += bs; rar->bytes_uncopied -= bs; /* * If *buff is NULL, it means unp_buffer is not full. * So we have to continue extracting a RAR file. */ } while (*buff == NULL); rar->unp_offset = 0; *size = rar->unp_buffer_size; *offset = rar->offset_outgoing; rar->offset_outgoing += *size; /* Calculate File CRC. */ rar->crc_calculated = crc32(rar->crc_calculated, *buff, (unsigned)*size); return ret; } static int parse_codes(struct archive_read *a) { int i, j, val, n, r; unsigned char bitlengths[MAX_SYMBOLS], zerocount, ppmd_flags; unsigned int maxorder; struct huffman_code precode; struct rar *rar = (struct rar *)(a->format->data); struct rar_br *br = &(rar->br); free_codes(a); /* Skip to the next byte */ rar_br_consume_unalined_bits(br); /* PPMd block flag */ if (!rar_br_read_ahead(a, br, 1)) goto truncated_data; if ((rar->is_ppmd_block = rar_br_bits(br, 1)) != 0) { rar_br_consume(br, 1); if (!rar_br_read_ahead(a, br, 7)) goto truncated_data; ppmd_flags = rar_br_bits(br, 7); rar_br_consume(br, 7); /* Memory is allocated in MB */ if (ppmd_flags & 0x20) { if (!rar_br_read_ahead(a, br, 8)) goto truncated_data; rar->dictionary_size = (rar_br_bits(br, 8) + 1) << 20; rar_br_consume(br, 8); } if (ppmd_flags & 0x40) { if (!rar_br_read_ahead(a, br, 8)) goto truncated_data; rar->ppmd_escape = rar->ppmd7_context.InitEsc = rar_br_bits(br, 8); rar_br_consume(br, 8); } else rar->ppmd_escape = 2; if (ppmd_flags & 0x20) { maxorder = (ppmd_flags & 0x1F) + 1; if(maxorder > 16) maxorder = 16 + (maxorder - 16) * 3; if (maxorder == 1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); return (ARCHIVE_FATAL); } /* Make sure ppmd7_contest is freed before Ppmd7_Construct * because reading a broken file cause this abnormal sequence. */ __archive_ppmd7_functions.Ppmd7_Free(&rar->ppmd7_context); rar->bytein.a = a; rar->bytein.Read = &ppmd_read; __archive_ppmd7_functions.PpmdRAR_RangeDec_CreateVTable(&rar->range_dec); rar->range_dec.Stream = &rar->bytein; __archive_ppmd7_functions.Ppmd7_Construct(&rar->ppmd7_context); if (rar->dictionary_size == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid zero dictionary size"); return (ARCHIVE_FATAL); } if (!__archive_ppmd7_functions.Ppmd7_Alloc(&rar->ppmd7_context, rar->dictionary_size)) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); return (ARCHIVE_FATAL); } if (!__archive_ppmd7_functions.PpmdRAR_RangeDec_Init(&rar->range_dec)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unable to initialize PPMd range decoder"); return (ARCHIVE_FATAL); } __archive_ppmd7_functions.Ppmd7_Init(&rar->ppmd7_context, maxorder); rar->ppmd_valid = 1; } else { if (!rar->ppmd_valid) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid PPMd sequence"); return (ARCHIVE_FATAL); } if (!__archive_ppmd7_functions.PpmdRAR_RangeDec_Init(&rar->range_dec)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unable to initialize PPMd range decoder"); return (ARCHIVE_FATAL); } } } else { rar_br_consume(br, 1); /* Keep existing table flag */ if (!rar_br_read_ahead(a, br, 1)) goto truncated_data; if (!rar_br_bits(br, 1)) memset(rar->lengthtable, 0, sizeof(rar->lengthtable)); rar_br_consume(br, 1); memset(&bitlengths, 0, sizeof(bitlengths)); for (i = 0; i < MAX_SYMBOLS;) { if (!rar_br_read_ahead(a, br, 4)) goto truncated_data; bitlengths[i++] = rar_br_bits(br, 4); rar_br_consume(br, 4); if (bitlengths[i-1] == 0xF) { if (!rar_br_read_ahead(a, br, 4)) goto truncated_data; zerocount = rar_br_bits(br, 4); rar_br_consume(br, 4); if (zerocount) { i--; for (j = 0; j < zerocount + 2 && i < MAX_SYMBOLS; j++) bitlengths[i++] = 0; } } } memset(&precode, 0, sizeof(precode)); r = create_code(a, &precode, bitlengths, MAX_SYMBOLS, MAX_SYMBOL_LENGTH); if (r != ARCHIVE_OK) { free(precode.tree); free(precode.table); return (r); } for (i = 0; i < HUFFMAN_TABLE_SIZE;) { if ((val = read_next_symbol(a, &precode)) < 0) { free(precode.tree); free(precode.table); return (ARCHIVE_FATAL); } if (val < 16) { rar->lengthtable[i] = (rar->lengthtable[i] + val) & 0xF; i++; } else if (val < 18) { if (i == 0) { free(precode.tree); free(precode.table); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Internal error extracting RAR file."); return (ARCHIVE_FATAL); } if(val == 16) { if (!rar_br_read_ahead(a, br, 3)) { free(precode.tree); free(precode.table); goto truncated_data; } n = rar_br_bits(br, 3) + 3; rar_br_consume(br, 3); } else { if (!rar_br_read_ahead(a, br, 7)) { free(precode.tree); free(precode.table); goto truncated_data; } n = rar_br_bits(br, 7) + 11; rar_br_consume(br, 7); } for (j = 0; j < n && i < HUFFMAN_TABLE_SIZE; j++) { rar->lengthtable[i] = rar->lengthtable[i-1]; i++; } } else { if(val == 18) { if (!rar_br_read_ahead(a, br, 3)) { free(precode.tree); free(precode.table); goto truncated_data; } n = rar_br_bits(br, 3) + 3; rar_br_consume(br, 3); } else { if (!rar_br_read_ahead(a, br, 7)) { free(precode.tree); free(precode.table); goto truncated_data; } n = rar_br_bits(br, 7) + 11; rar_br_consume(br, 7); } for(j = 0; j < n && i < HUFFMAN_TABLE_SIZE; j++) rar->lengthtable[i++] = 0; } } free(precode.tree); free(precode.table); r = create_code(a, &rar->maincode, &rar->lengthtable[0], MAINCODE_SIZE, MAX_SYMBOL_LENGTH); if (r != ARCHIVE_OK) return (r); r = create_code(a, &rar->offsetcode, &rar->lengthtable[MAINCODE_SIZE], OFFSETCODE_SIZE, MAX_SYMBOL_LENGTH); if (r != ARCHIVE_OK) return (r); r = create_code(a, &rar->lowoffsetcode, &rar->lengthtable[MAINCODE_SIZE + OFFSETCODE_SIZE], LOWOFFSETCODE_SIZE, MAX_SYMBOL_LENGTH); if (r != ARCHIVE_OK) return (r); r = create_code(a, &rar->lengthcode, &rar->lengthtable[MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE], LENGTHCODE_SIZE, MAX_SYMBOL_LENGTH); if (r != ARCHIVE_OK) return (r); } if (!rar->dictionary_size || !rar->lzss.window) { /* Seems as though dictionary sizes are not used. Even so, minimize * memory usage as much as possible. */ void *new_window; unsigned int new_size; if (rar->unp_size >= DICTIONARY_MAX_SIZE) new_size = DICTIONARY_MAX_SIZE; else new_size = rar_fls((unsigned int)rar->unp_size) << 1; + if (new_size == 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Zero window size is invalid."); + return (ARCHIVE_FATAL); + } new_window = realloc(rar->lzss.window, new_size); if (new_window == NULL) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for uncompressed data."); return (ARCHIVE_FATAL); } rar->lzss.window = (unsigned char *)new_window; rar->dictionary_size = new_size; memset(rar->lzss.window, 0, rar->dictionary_size); rar->lzss.mask = rar->dictionary_size - 1; } rar->start_new_table = 0; return (ARCHIVE_OK); truncated_data: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); rar->valid = 0; return (ARCHIVE_FATAL); } static void free_codes(struct archive_read *a) { struct rar *rar = (struct rar *)(a->format->data); free(rar->maincode.tree); free(rar->offsetcode.tree); free(rar->lowoffsetcode.tree); free(rar->lengthcode.tree); free(rar->maincode.table); free(rar->offsetcode.table); free(rar->lowoffsetcode.table); free(rar->lengthcode.table); memset(&rar->maincode, 0, sizeof(rar->maincode)); memset(&rar->offsetcode, 0, sizeof(rar->offsetcode)); memset(&rar->lowoffsetcode, 0, sizeof(rar->lowoffsetcode)); memset(&rar->lengthcode, 0, sizeof(rar->lengthcode)); } static int read_next_symbol(struct archive_read *a, struct huffman_code *code) { unsigned char bit; unsigned int bits; int length, value, node; struct rar *rar; struct rar_br *br; if (!code->table) { if (make_table(a, code) != (ARCHIVE_OK)) return -1; } rar = (struct rar *)(a->format->data); br = &(rar->br); /* Look ahead (peek) at bits */ if (!rar_br_read_ahead(a, br, code->tablesize)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); rar->valid = 0; return -1; } bits = rar_br_bits(br, code->tablesize); length = code->table[bits].length; value = code->table[bits].value; if (length < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid prefix code in bitstream"); return -1; } if (length <= code->tablesize) { /* Skip length bits */ rar_br_consume(br, length); return value; } /* Skip tablesize bits */ rar_br_consume(br, code->tablesize); node = value; while (!(code->tree[node].branches[0] == code->tree[node].branches[1])) { if (!rar_br_read_ahead(a, br, 1)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); rar->valid = 0; return -1; } bit = rar_br_bits(br, 1); rar_br_consume(br, 1); if (code->tree[node].branches[bit] < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid prefix code in bitstream"); return -1; } node = code->tree[node].branches[bit]; } return code->tree[node].branches[0]; } static int create_code(struct archive_read *a, struct huffman_code *code, unsigned char *lengths, int numsymbols, char maxlength) { int i, j, codebits = 0, symbolsleft = numsymbols; code->numentries = 0; code->numallocatedentries = 0; if (new_node(code) < 0) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for node data."); return (ARCHIVE_FATAL); } code->numentries = 1; code->minlength = INT_MAX; code->maxlength = INT_MIN; codebits = 0; for(i = 1; i <= maxlength; i++) { for(j = 0; j < numsymbols; j++) { if (lengths[j] != i) continue; if (add_value(a, code, j, codebits, i) != ARCHIVE_OK) return (ARCHIVE_FATAL); codebits++; if (--symbolsleft <= 0) { break; break; } } codebits <<= 1; } return (ARCHIVE_OK); } static int add_value(struct archive_read *a, struct huffman_code *code, int value, int codebits, int length) { int repeatpos, lastnode, bitpos, bit, repeatnode, nextnode; free(code->table); code->table = NULL; if(length > code->maxlength) code->maxlength = length; if(length < code->minlength) code->minlength = length; repeatpos = -1; if (repeatpos == 0 || (repeatpos >= 0 && (((codebits >> (repeatpos - 1)) & 3) == 0 || ((codebits >> (repeatpos - 1)) & 3) == 3))) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid repeat position"); return (ARCHIVE_FATAL); } lastnode = 0; for (bitpos = length - 1; bitpos >= 0; bitpos--) { bit = (codebits >> bitpos) & 1; /* Leaf node check */ if (code->tree[lastnode].branches[0] == code->tree[lastnode].branches[1]) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Prefix found"); return (ARCHIVE_FATAL); } if (bitpos == repeatpos) { /* Open branch check */ if (!(code->tree[lastnode].branches[bit] < 0)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid repeating code"); return (ARCHIVE_FATAL); } if ((repeatnode = new_node(code)) < 0) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for node data."); return (ARCHIVE_FATAL); } if ((nextnode = new_node(code)) < 0) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for node data."); return (ARCHIVE_FATAL); } /* Set branches */ code->tree[lastnode].branches[bit] = repeatnode; code->tree[repeatnode].branches[bit] = repeatnode; code->tree[repeatnode].branches[bit^1] = nextnode; lastnode = nextnode; bitpos++; /* terminating bit already handled, skip it */ } else { /* Open branch check */ if (code->tree[lastnode].branches[bit] < 0) { if (new_node(code) < 0) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for node data."); return (ARCHIVE_FATAL); } code->tree[lastnode].branches[bit] = code->numentries++; } /* set to branch */ lastnode = code->tree[lastnode].branches[bit]; } } if (!(code->tree[lastnode].branches[0] == -1 && code->tree[lastnode].branches[1] == -2)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Prefix found"); return (ARCHIVE_FATAL); } /* Set leaf value */ code->tree[lastnode].branches[0] = value; code->tree[lastnode].branches[1] = value; return (ARCHIVE_OK); } static int new_node(struct huffman_code *code) { void *new_tree; if (code->numallocatedentries == code->numentries) { int new_num_entries = 256; if (code->numentries > 0) { new_num_entries = code->numentries * 2; } new_tree = realloc(code->tree, new_num_entries * sizeof(*code->tree)); if (new_tree == NULL) return (-1); code->tree = (struct huffman_tree_node *)new_tree; code->numallocatedentries = new_num_entries; } code->tree[code->numentries].branches[0] = -1; code->tree[code->numentries].branches[1] = -2; return 1; } static int make_table(struct archive_read *a, struct huffman_code *code) { if (code->maxlength < code->minlength || code->maxlength > 10) code->tablesize = 10; else code->tablesize = code->maxlength; code->table = (struct huffman_table_entry *)calloc(1, sizeof(*code->table) * ((size_t)1 << code->tablesize)); return make_table_recurse(a, code, 0, code->table, 0, code->tablesize); } static int make_table_recurse(struct archive_read *a, struct huffman_code *code, int node, struct huffman_table_entry *table, int depth, int maxdepth) { int currtablesize, i, ret = (ARCHIVE_OK); if (!code->tree) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Huffman tree was not created."); return (ARCHIVE_FATAL); } if (node < 0 || node >= code->numentries) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid location to Huffman tree specified."); return (ARCHIVE_FATAL); } currtablesize = 1 << (maxdepth - depth); if (code->tree[node].branches[0] == code->tree[node].branches[1]) { for(i = 0; i < currtablesize; i++) { table[i].length = depth; table[i].value = code->tree[node].branches[0]; } } else if (node < 0) { for(i = 0; i < currtablesize; i++) table[i].length = -1; } else { if(depth == maxdepth) { table[0].length = maxdepth + 1; table[0].value = node; } else { ret |= make_table_recurse(a, code, code->tree[node].branches[0], table, depth + 1, maxdepth); ret |= make_table_recurse(a, code, code->tree[node].branches[1], table + currtablesize / 2, depth + 1, maxdepth); } } return ret; } static int64_t expand(struct archive_read *a, int64_t end) { static const unsigned char lengthbases[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224 }; static const unsigned char lengthbits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; static const unsigned int offsetbases[] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608, 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040, 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872, 3670016, 3932160 }; static const unsigned char offsetbits[] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18 }; static const unsigned char shortbases[] = { 0, 4, 8, 16, 32, 64, 128, 192 }; static const unsigned char shortbits[] = { 2, 2, 3, 4, 5, 6, 6, 6 }; int symbol, offs, len, offsindex, lensymbol, i, offssymbol, lowoffsetsymbol; unsigned char newfile; struct rar *rar = (struct rar *)(a->format->data); struct rar_br *br = &(rar->br); if (rar->filterstart < end) end = rar->filterstart; while (1) { if (rar->output_last_match && lzss_position(&rar->lzss) + rar->lastlength <= end) { lzss_emit_match(rar, rar->lastoffset, rar->lastlength); rar->output_last_match = 0; } if(rar->is_ppmd_block || rar->output_last_match || lzss_position(&rar->lzss) >= end) return lzss_position(&rar->lzss); if ((symbol = read_next_symbol(a, &rar->maincode)) < 0) return (ARCHIVE_FATAL); rar->output_last_match = 0; if (symbol < 256) { lzss_emit_literal(rar, symbol); continue; } else if (symbol == 256) { if (!rar_br_read_ahead(a, br, 1)) goto truncated_data; newfile = !rar_br_bits(br, 1); rar_br_consume(br, 1); if(newfile) { rar->start_new_block = 1; if (!rar_br_read_ahead(a, br, 1)) goto truncated_data; rar->start_new_table = rar_br_bits(br, 1); rar_br_consume(br, 1); return lzss_position(&rar->lzss); } else { if (parse_codes(a) != ARCHIVE_OK) return (ARCHIVE_FATAL); continue; } } else if(symbol==257) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parsing filters is unsupported."); return (ARCHIVE_FAILED); } else if(symbol==258) { if(rar->lastlength == 0) continue; offs = rar->lastoffset; len = rar->lastlength; } else if (symbol <= 262) { offsindex = symbol - 259; offs = rar->oldoffset[offsindex]; if ((lensymbol = read_next_symbol(a, &rar->lengthcode)) < 0) goto bad_data; if (lensymbol > (int)(sizeof(lengthbases)/sizeof(lengthbases[0]))) goto bad_data; if (lensymbol > (int)(sizeof(lengthbits)/sizeof(lengthbits[0]))) goto bad_data; len = lengthbases[lensymbol] + 2; if (lengthbits[lensymbol] > 0) { if (!rar_br_read_ahead(a, br, lengthbits[lensymbol])) goto truncated_data; len += rar_br_bits(br, lengthbits[lensymbol]); rar_br_consume(br, lengthbits[lensymbol]); } for (i = offsindex; i > 0; i--) rar->oldoffset[i] = rar->oldoffset[i-1]; rar->oldoffset[0] = offs; } else if(symbol<=270) { offs = shortbases[symbol-263] + 1; if(shortbits[symbol-263] > 0) { if (!rar_br_read_ahead(a, br, shortbits[symbol-263])) goto truncated_data; offs += rar_br_bits(br, shortbits[symbol-263]); rar_br_consume(br, shortbits[symbol-263]); } len = 2; for(i = 3; i > 0; i--) rar->oldoffset[i] = rar->oldoffset[i-1]; rar->oldoffset[0] = offs; } else { if (symbol-271 > (int)(sizeof(lengthbases)/sizeof(lengthbases[0]))) goto bad_data; if (symbol-271 > (int)(sizeof(lengthbits)/sizeof(lengthbits[0]))) goto bad_data; len = lengthbases[symbol-271]+3; if(lengthbits[symbol-271] > 0) { if (!rar_br_read_ahead(a, br, lengthbits[symbol-271])) goto truncated_data; len += rar_br_bits(br, lengthbits[symbol-271]); rar_br_consume(br, lengthbits[symbol-271]); } if ((offssymbol = read_next_symbol(a, &rar->offsetcode)) < 0) goto bad_data; if (offssymbol > (int)(sizeof(offsetbases)/sizeof(offsetbases[0]))) goto bad_data; if (offssymbol > (int)(sizeof(offsetbits)/sizeof(offsetbits[0]))) goto bad_data; offs = offsetbases[offssymbol]+1; if(offsetbits[offssymbol] > 0) { if(offssymbol > 9) { if(offsetbits[offssymbol] > 4) { if (!rar_br_read_ahead(a, br, offsetbits[offssymbol] - 4)) goto truncated_data; offs += rar_br_bits(br, offsetbits[offssymbol] - 4) << 4; rar_br_consume(br, offsetbits[offssymbol] - 4); } if(rar->numlowoffsetrepeats > 0) { rar->numlowoffsetrepeats--; offs += rar->lastlowoffset; } else { if ((lowoffsetsymbol = read_next_symbol(a, &rar->lowoffsetcode)) < 0) return (ARCHIVE_FATAL); if(lowoffsetsymbol == 16) { rar->numlowoffsetrepeats = 15; offs += rar->lastlowoffset; } else { offs += lowoffsetsymbol; rar->lastlowoffset = lowoffsetsymbol; } } } else { if (!rar_br_read_ahead(a, br, offsetbits[offssymbol])) goto truncated_data; offs += rar_br_bits(br, offsetbits[offssymbol]); rar_br_consume(br, offsetbits[offssymbol]); } } if (offs >= 0x40000) len++; if (offs >= 0x2000) len++; for(i = 3; i > 0; i--) rar->oldoffset[i] = rar->oldoffset[i-1]; rar->oldoffset[0] = offs; } rar->lastoffset = offs; rar->lastlength = len; rar->output_last_match = 1; } truncated_data: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated RAR file data"); rar->valid = 0; return (ARCHIVE_FATAL); bad_data: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad RAR file data"); return (ARCHIVE_FATAL); } static int copy_from_lzss_window(struct archive_read *a, const void **buffer, int64_t startpos, int length) { int windowoffs, firstpart; struct rar *rar = (struct rar *)(a->format->data); if (!rar->unp_buffer) { if ((rar->unp_buffer = malloc(rar->unp_buffer_size)) == NULL) { archive_set_error(&a->archive, ENOMEM, "Unable to allocate memory for uncompressed data."); return (ARCHIVE_FATAL); } } windowoffs = lzss_offset_for_position(&rar->lzss, startpos); if(windowoffs + length <= lzss_size(&rar->lzss)) { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], length); } else if (length <= lzss_size(&rar->lzss)) { firstpart = lzss_size(&rar->lzss) - windowoffs; if (firstpart < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad RAR file data"); return (ARCHIVE_FATAL); } if (firstpart < length) { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], firstpart); memcpy(&rar->unp_buffer[rar->unp_offset + firstpart], &rar->lzss.window[0], length - firstpart); } else { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], length); } } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad RAR file data"); return (ARCHIVE_FATAL); } rar->unp_offset += length; if (rar->unp_offset >= rar->unp_buffer_size) *buffer = rar->unp_buffer; else *buffer = NULL; return (ARCHIVE_OK); } static const void * rar_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) { struct rar *rar = (struct rar *)(a->format->data); const void *h = __archive_read_ahead(a, min, avail); int ret; if (avail) { if (a->archive.read_data_is_posix_read && *avail > (ssize_t)a->archive.read_data_requested) *avail = a->archive.read_data_requested; if (*avail > rar->bytes_remaining) *avail = (ssize_t)rar->bytes_remaining; if (*avail < 0) return NULL; else if (*avail == 0 && rar->main_flags & MHD_VOLUME && rar->file_flags & FHD_SPLIT_AFTER) { + rar->filename_must_match = 1; ret = archive_read_format_rar_read_header(a, a->entry); if (ret == (ARCHIVE_EOF)) { rar->has_endarc_header = 1; ret = archive_read_format_rar_read_header(a, a->entry); } + rar->filename_must_match = 0; if (ret != (ARCHIVE_OK)) return NULL; return rar_read_ahead(a, min, avail); } } return h; } Index: head/contrib/libarchive/libarchive/archive_read_support_format_rar5.c =================================================================== --- head/contrib/libarchive/libarchive/archive_read_support_format_rar5.c (revision 342041) +++ head/contrib/libarchive/libarchive/archive_read_support_format_rar5.c (revision 342042) @@ -1,3478 +1,3485 @@ /*- * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" +#include "archive_endian.h" #ifdef HAVE_ERRNO_H #include #endif #include #ifdef HAVE_ZLIB_H #include /* crc32 */ #endif #include "archive.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" #endif #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_ppmd7_private.h" #include "archive_entry_private.h" #ifdef HAVE_BLAKE2_H #include #else #include "archive_blake2.h" #endif /*#define CHECK_CRC_ON_SOLID_SKIP*/ /*#define DONT_FAIL_ON_CRC_ERROR*/ /*#define DEBUG*/ #define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) #define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) #if defined DEBUG #define DEBUG_CODE if(1) #else #define DEBUG_CODE if(0) #endif /* Real RAR5 magic number is: * * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 * "Rar!→•☺·\x00" * * It's stored in `rar5_signature` after XOR'ing it with 0xA1, because I don't * want to put this magic sequence in each binary that uses libarchive, so * applications that scan through the file for this marker won't trigger on * this "false" one. * * The array itself is decrypted in `rar5_init` function. */ static unsigned char rar5_signature[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; static const ssize_t rar5_signature_size = sizeof(rar5_signature); /* static const size_t g_unpack_buf_chunk_size = 1024; */ static const size_t g_unpack_window_size = 0x20000; struct file_header { ssize_t bytes_remaining; ssize_t unpacked_size; int64_t last_offset; /* Used in sanity checks. */ int64_t last_size; /* Used in sanity checks. */ uint8_t solid : 1; /* Is this a solid stream? */ uint8_t service : 1; /* Is this file a service data? */ uint8_t eof : 1; /* Did we finish unpacking the file? */ /* Optional time fields. */ uint64_t e_mtime; uint64_t e_ctime; uint64_t e_atime; uint32_t e_unix_ns; /* Optional hash fields. */ uint32_t stored_crc32; uint32_t calculated_crc32; uint8_t blake2sp[32]; blake2sp_state b2state; char has_blake2; }; enum FILTER_TYPE { FILTER_DELTA = 0, /* Generic pattern. */ FILTER_E8 = 1, /* Intel x86 code. */ FILTER_E8E9 = 2, /* Intel x86 code. */ FILTER_ARM = 3, /* ARM code. */ FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ FILTER_RGB = 5, /* Color palette, not used in RARv5. */ FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ FILTER_PPM = 7, /* Predictive pattern matching, not used in RARv5. */ FILTER_NONE = 8, }; struct filter_info { int type; int channels; int pos_r; int64_t block_start; ssize_t block_length; uint16_t width; }; struct data_ready { char used; const uint8_t* buf; size_t size; int64_t offset; }; struct cdeque { uint16_t beg_pos; uint16_t end_pos; uint16_t cap_mask; uint16_t size; size_t* arr; }; struct decode_table { uint32_t size; int32_t decode_len[16]; uint32_t decode_pos[16]; uint32_t quick_bits; uint8_t quick_len[1 << 10]; uint16_t quick_num[1 << 10]; uint16_t decode_num[306]; }; struct comp_state { /* Flag used to specify if unpacker needs to reinitialize the uncompression * context. */ uint8_t initialized : 1; /* Flag used when applying filters. */ uint8_t all_filters_applied : 1; /* Flag used to skip file context reinitialization, used when unpacker is * skipping through different multivolume archives. */ uint8_t switch_multivolume : 1; /* Flag used to specify if unpacker has processed the whole data block or * just a part of it. */ uint8_t block_parsing_finished : 1; int notused : 4; int flags; /* Uncompression flags. */ int method; /* Uncompression algorithm method. */ int version; /* Uncompression algorithm version. */ ssize_t window_size; /* Size of window_buf. */ uint8_t* window_buf; /* Circular buffer used during decompression. */ uint8_t* filtered_buf; /* Buffer used when applying filters. */ const uint8_t* block_buf; /* Buffer used when merging blocks. */ size_t window_mask; /* Convenience field; window_size - 1. */ int64_t write_ptr; /* This amount of data has been unpacked in the window buffer. */ int64_t last_write_ptr; /* This amount of data has been stored in the output file. */ int64_t last_unstore_ptr; /* Counter of bytes extracted during unstoring. This is separate from last_write_ptr because of how SERVICE base blocks are handled during skipping in solid multiarchive archives. */ int64_t solid_offset; /* Additional offset inside the window buffer, used in unpacking solid archives. */ ssize_t cur_block_size; /* Size of current data block. */ int last_len; /* Flag used in lzss decompression. */ /* Decode tables used during lzss uncompression. */ #define HUFF_BC 20 struct decode_table bd; /* huffman bit lengths */ #define HUFF_NC 306 struct decode_table ld; /* literals */ #define HUFF_DC 64 struct decode_table dd; /* distances */ #define HUFF_LDC 16 struct decode_table ldd; /* lower bits of distances */ #define HUFF_RC 44 struct decode_table rd; /* repeating distances */ #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) /* Circular deque for storing filters. */ struct cdeque filters; int64_t last_block_start; /* Used for sanity checking. */ ssize_t last_block_length; /* Used for sanity checking. */ /* Distance cache used during lzss uncompression. */ int dist_cache[4]; /* Data buffer stack. */ struct data_ready dready[2]; }; /* Bit reader state. */ struct bit_reader { int8_t bit_addr; /* Current bit pointer inside current byte. */ int in_addr; /* Current byte pointer. */ }; -/* RARv5 block header structure. */ +/* RARv5 block header structure. Use bf_* functions to get values from + * block_flags_u8 field. I.e. bf_byte_count, etc. */ struct compressed_block_header { - union { - struct { - uint8_t bit_size : 3; - uint8_t byte_count : 3; - uint8_t is_last_block : 1; - uint8_t is_table_present : 1; - } block_flags; - uint8_t block_flags_u8; - }; - + /* block_flags_u8 contain fields encoded in little-endian bitfield: + * + * - table present flag (shr 7, and 1), + * - last block flag (shr 6, and 1), + * - byte_count (shr 3, and 7), + * - bit_size (shr 0, and 7). + */ + uint8_t block_flags_u8; uint8_t block_cksum; }; /* RARv5 main header structure. */ struct main_header { /* Does the archive contain solid streams? */ uint8_t solid : 1; /* If this a multi-file archive? */ uint8_t volume : 1; uint8_t endarc : 1; uint8_t notused : 5; int vol_no; }; struct generic_header { uint8_t split_after : 1; uint8_t split_before : 1; uint8_t padding : 6; int size; int last_header_id; }; struct multivolume { int expected_vol_no; uint8_t* push_buf; }; /* Main context structure. */ struct rar5 { int header_initialized; /* Set to 1 if current file is positioned AFTER the magic value * of the archive file. This is used in header reading functions. */ int skipped_magic; /* Set to not zero if we're in skip mode (either by calling rar5_data_skip * function or when skipping over solid streams). Set to 0 when in * extraction mode. This is used during checksum calculation functions. */ int skip_mode; /* An offset to QuickOpen list. This is not supported by this unpacker, * because we're focusing on streaming interface. QuickOpen is designed * to make things quicker for non-stream interfaces, so it's not our * use case. */ uint64_t qlist_offset; /* An offset to additional Recovery data. This is not supported by this * unpacker. Recovery data are additional Reed-Solomon codes that could * be used to calculate bytes that are missing in archive or are * corrupted. */ uint64_t rr_offset; /* Various context variables grouped to different structures. */ struct generic_header generic; struct main_header main; struct comp_state cstate; struct file_header file; struct bit_reader bits; struct multivolume vol; /* The header of currently processed RARv5 block. Used in main * decompression logic loop. */ struct compressed_block_header last_block_hdr; }; /* Forward function declarations. */ static int verify_global_checksums(struct archive_read* a); static int rar5_read_data_skip(struct archive_read *a); static int push_data_ready(struct archive_read* a, struct rar5* rar, const uint8_t* buf, size_t size, int64_t offset); /* CDE_xxx = Circular Double Ended (Queue) return values. */ enum CDE_RETURN_VALUES { CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, }; /* Clears the contents of this circular deque. */ static void cdeque_clear(struct cdeque* d) { d->size = 0; d->beg_pos = 0; d->end_pos = 0; } /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, * 64, 256, etc. When the user will add another item above current capacity, * the circular deque will overwrite the oldest entry. */ static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { if(d == NULL || max_capacity_power_of_2 == 0) return CDE_PARAM; d->cap_mask = max_capacity_power_of_2 - 1; d->arr = NULL; if((max_capacity_power_of_2 & d->cap_mask) > 0) return CDE_PARAM; cdeque_clear(d); d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); return d->arr ? CDE_OK : CDE_ALLOC; } /* Return the current size (not capacity) of circular deque `d`. */ static size_t cdeque_size(struct cdeque* d) { return d->size; } /* Returns the first element of current circular deque. Note that this function * doesn't perform any bounds checking. If you need bounds checking, use * `cdeque_front()` function instead. */ static void cdeque_front_fast(struct cdeque* d, void** value) { *value = (void*) d->arr[d->beg_pos]; } /* Returns the first element of current circular deque. This function * performs bounds checking. */ static int cdeque_front(struct cdeque* d, void** value) { if(d->size > 0) { cdeque_front_fast(d, value); return CDE_OK; } else return CDE_OUT_OF_BOUNDS; } /* Pushes a new element into the end of this circular deque object. If current * size will exceed capacity, the oldest element will be overwritten. */ static int cdeque_push_back(struct cdeque* d, void* item) { if(d == NULL) return CDE_PARAM; if(d->size == d->cap_mask + 1) return CDE_OUT_OF_BOUNDS; d->arr[d->end_pos] = (size_t) item; d->end_pos = (d->end_pos + 1) & d->cap_mask; d->size++; return CDE_OK; } /* Pops a front element of this circular deque object and returns its value. * This function doesn't perform any bounds checking. */ static void cdeque_pop_front_fast(struct cdeque* d, void** value) { *value = (void*) d->arr[d->beg_pos]; d->beg_pos = (d->beg_pos + 1) & d->cap_mask; d->size--; } /* Pops a front element of this circular deque object and returns its value. * This function performs bounds checking. */ static int cdeque_pop_front(struct cdeque* d, void** value) { if(!d || !value) return CDE_PARAM; if(d->size == 0) return CDE_OUT_OF_BOUNDS; cdeque_pop_front_fast(d, value); return CDE_OK; } /* Convenience function to cast filter_info** to void **. */ static void** cdeque_filter_p(struct filter_info** f) { return (void**) (size_t) f; } /* Convenience function to cast filter_info* to void *. */ static void* cdeque_filter(struct filter_info* f) { return (void**) (size_t) f; } /* Destroys this circular deque object. Deallocates the memory of the collection * buffer, but doesn't deallocate the memory of any pointer passed to this * deque as a value. */ static void cdeque_free(struct cdeque* d) { if(!d) return; if(!d->arr) return; free(d->arr); d->arr = NULL; d->beg_pos = -1; d->end_pos = -1; d->cap_mask = 0; } +static inline +uint8_t bf_bit_size(const struct compressed_block_header* hdr) { + return hdr->block_flags_u8 & 7; +} + +static inline +uint8_t bf_byte_count(const struct compressed_block_header* hdr) { + return (hdr->block_flags_u8 >> 3) & 7; +} + +static inline +uint8_t bf_is_table_present(const struct compressed_block_header* hdr) { + return (hdr->block_flags_u8 >> 7) & 1; +} + static inline struct rar5* get_context(struct archive_read* a) { return (struct rar5*) a->format->data; } -// TODO: make sure these functions return a little endian number - /* Convenience functions used by filter implementations. */ static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { - uint32_t* dptr = (uint32_t*) &rar->cstate.window_buf[offset]; - // TODO: bswap if big endian - return *dptr; + return archive_le32dec(&rar->cstate.window_buf[offset]); } static void write_filter_data(struct rar5* rar, uint32_t offset, uint32_t value) { - uint32_t* dptr = (uint32_t*) &rar->cstate.filtered_buf[offset]; - // TODO: bswap if big endian - *dptr = value; + archive_le32enc(&rar->cstate.filtered_buf[offset], value); } static void circular_memcpy(uint8_t* dst, uint8_t* window, const int mask, int64_t start, int64_t end) { if((start & mask) > (end & mask)) { ssize_t len1 = mask + 1 - (start & mask); ssize_t len2 = end & mask; memcpy(dst, &window[start & mask], len1); memcpy(dst + len1, window, len2); } else { memcpy(dst, &window[start & mask], (size_t) (end - start)); } } /* Allocates a new filter descriptor and adds it to the filter array. */ static struct filter_info* add_new_filter(struct rar5* rar) { struct filter_info* f = (struct filter_info*) calloc(1, sizeof(struct filter_info)); if(!f) { return NULL; } cdeque_push_back(&rar->cstate.filters, cdeque_filter(f)); return f; } static int run_delta_filter(struct rar5* rar, struct filter_info* flt) { int i; ssize_t dest_pos, src_pos = 0; for(i = 0; i < flt->channels; i++) { uint8_t prev_byte = 0; for(dest_pos = i; dest_pos < flt->block_length; dest_pos += flt->channels) { uint8_t byte; byte = rar->cstate.window_buf[(rar->cstate.solid_offset + flt->block_start + src_pos) & rar->cstate.window_mask]; prev_byte -= byte; rar->cstate.filtered_buf[dest_pos] = prev_byte; src_pos++; } } return ARCHIVE_OK; } static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, int extended) { const uint32_t file_size = 0x1000000; ssize_t i; circular_memcpy(rar->cstate.filtered_buf, rar->cstate.window_buf, rar->cstate.window_mask, rar->cstate.solid_offset + flt->block_start, rar->cstate.solid_offset + flt->block_start + flt->block_length); for(i = 0; i < flt->block_length - 4;) { uint8_t b = rar->cstate.window_buf[(rar->cstate.solid_offset + flt->block_start + i++) & rar->cstate.window_mask]; /* 0xE8 = x86's call (function call) * 0xE9 = x86's jmp (unconditional jump) */ if(b == 0xE8 || (extended && b == 0xE9)) { uint32_t addr; uint32_t offset = (i + flt->block_start) % file_size; addr = read_filter_data(rar, (rar->cstate.solid_offset + flt->block_start + i) & rar->cstate.window_mask); if(addr & 0x80000000) { if(((addr + offset) & 0x80000000) == 0) { write_filter_data(rar, i, addr + file_size); } } else { if((addr - file_size) & 0x80000000) { uint32_t naddr = addr - offset; write_filter_data(rar, i, naddr); } } i += 4; } } return ARCHIVE_OK; } static int run_arm_filter(struct rar5* rar, struct filter_info* flt) { ssize_t i = 0; uint32_t offset; const int mask = rar->cstate.window_mask; circular_memcpy(rar->cstate.filtered_buf, rar->cstate.window_buf, rar->cstate.window_mask, rar->cstate.solid_offset + flt->block_start, rar->cstate.solid_offset + flt->block_start + flt->block_length); for(i = 0; i < flt->block_length - 3; i += 4) { uint8_t* b = &rar->cstate.window_buf[(rar->cstate.solid_offset + flt->block_start + i) & mask]; if(b[3] == 0xEB) { /* 0xEB = ARM's BL (branch + link) instruction. */ offset = read_filter_data(rar, (rar->cstate.solid_offset + flt->block_start + i) & mask) & 0x00ffffff; offset -= (uint32_t) ((i + flt->block_start) / 4); offset = (offset & 0x00ffffff) | 0xeb000000; write_filter_data(rar, i, offset); } } return ARCHIVE_OK; } static int run_filter(struct archive_read* a, struct filter_info* flt) { int ret; struct rar5* rar = get_context(a); if(rar->cstate.filtered_buf) free(rar->cstate.filtered_buf); rar->cstate.filtered_buf = malloc(flt->block_length); if(!rar->cstate.filtered_buf) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for " "filter data."); return ARCHIVE_FATAL; } switch(flt->type) { case FILTER_DELTA: ret = run_delta_filter(rar, flt); break; case FILTER_E8: /* fallthrough */ case FILTER_E8E9: ret = run_e8e9_filter(rar, flt, flt->type == FILTER_E8E9); break; case FILTER_ARM: ret = run_arm_filter(rar, flt); break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported filter type: 0x%02x", flt->type); return ARCHIVE_FATAL; } if(ret != ARCHIVE_OK) { /* Filter has failed. */ return ret; } if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, flt->block_length, rar->cstate.last_write_ptr)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Stack overflow when submitting unpacked data"); return ARCHIVE_FATAL; } rar->cstate.last_write_ptr += flt->block_length; return ARCHIVE_OK; } /* The `push_data` function submits the selected data range to the user. * Next call of `use_data` will use the pointer, size and offset arguments * that are specified here. These arguments are pushed to the FIFO stack here, * and popped from the stack by the `use_data` function. */ static void push_data(struct archive_read* a, struct rar5* rar, const uint8_t* buf, int64_t idx_begin, int64_t idx_end) { const int wmask = rar->cstate.window_mask; const ssize_t solid_write_ptr = (rar->cstate.solid_offset + rar->cstate.last_write_ptr) & wmask; idx_begin += rar->cstate.solid_offset; idx_end += rar->cstate.solid_offset; /* Check if our unpacked data is wrapped inside the window circular buffer. * If it's not wrapped, it can be copied out by using a single memcpy, * but when it's wrapped, we need to copy the first part with one * memcpy, and the second part with another memcpy. */ if((idx_begin & wmask) > (idx_end & wmask)) { /* The data is wrapped (begin offset sis bigger than end offset). */ const ssize_t frag1_size = rar->cstate.window_size - (idx_begin & wmask); const ssize_t frag2_size = idx_end & wmask; /* Copy the first part of the buffer first. */ push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, rar->cstate.last_write_ptr); /* Copy the second part of the buffer. */ push_data_ready(a, rar, buf, frag2_size, rar->cstate.last_write_ptr + frag1_size); rar->cstate.last_write_ptr += frag1_size + frag2_size; } else { /* Data is not wrapped, so we can just use one call to copy the * data. */ push_data_ready(a, rar, buf + solid_write_ptr, (idx_end - idx_begin) & wmask, rar->cstate.last_write_ptr); rar->cstate.last_write_ptr += idx_end - idx_begin; } } /* Convenience function that submits the data to the user. It uses the * unpack window buffer as a source location. */ static void push_window_data(struct archive_read* a, struct rar5* rar, int64_t idx_begin, int64_t idx_end) { push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); } static int apply_filters(struct archive_read* a) { struct filter_info* flt; struct rar5* rar = get_context(a); int ret; rar->cstate.all_filters_applied = 0; /* Get the first filter that can be applied to our data. The data needs to * be fully unpacked before the filter can be run. */ if(CDE_OK == cdeque_front(&rar->cstate.filters, cdeque_filter_p(&flt))) { /* Check if our unpacked data fully covers this filter's range. */ if(rar->cstate.write_ptr > flt->block_start && rar->cstate.write_ptr >= flt->block_start + flt->block_length) { /* Check if we have some data pending to be written right before * the filter's start offset. */ if(rar->cstate.last_write_ptr == flt->block_start) { /* Run the filter specified by descriptor `flt`. */ ret = run_filter(a, flt); if(ret != ARCHIVE_OK) { /* Filter failure, return error. */ return ret; } /* Filter descriptor won't be needed anymore after it's used, * so remove it from the filter list and free its memory. */ (void) cdeque_pop_front(&rar->cstate.filters, cdeque_filter_p(&flt)); free(flt); } else { /* We can't run filters yet, dump the memory right before the * filter. */ push_window_data(a, rar, rar->cstate.last_write_ptr, flt->block_start); } /* Return 'filter applied or not needed' state to the caller. */ return ARCHIVE_RETRY; } } rar->cstate.all_filters_applied = 1; return ARCHIVE_OK; } static void dist_cache_push(struct rar5* rar, int value) { int* q = rar->cstate.dist_cache; q[3] = q[2]; q[2] = q[1]; q[1] = q[0]; q[0] = value; } static int dist_cache_touch(struct rar5* rar, int idx) { int* q = rar->cstate.dist_cache; int i, dist = q[idx]; for(i = idx; i > 0; i--) q[i] = q[i - 1]; q[0] = dist; return dist; } static void free_filters(struct rar5* rar) { struct cdeque* d = &rar->cstate.filters; /* Free any remaining filters. All filters should be naturally consumed by * the unpacking function, so remaining filters after unpacking normally * mean that unpacking wasn't successful. But still of course we shouldn't * leak memory in such case. */ /* cdeque_size() is a fast operation, so we can use it as a loop * expression. */ while(cdeque_size(d) > 0) { struct filter_info* f = NULL; /* Pop_front will also decrease the collection's size. */ if(CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)) && f != NULL) free(f); } cdeque_clear(d); /* Also clear out the variables needed for sanity checking. */ rar->cstate.last_block_start = 0; rar->cstate.last_block_length = 0; } static void reset_file_context(struct rar5* rar) { memset(&rar->file, 0, sizeof(rar->file)); blake2sp_init(&rar->file.b2state, 32); if(rar->main.solid) { rar->cstate.solid_offset += rar->cstate.write_ptr; } else { rar->cstate.solid_offset = 0; } rar->cstate.write_ptr = 0; rar->cstate.last_write_ptr = 0; rar->cstate.last_unstore_ptr = 0; free_filters(rar); } static inline int get_archive_read(struct archive* a, struct archive_read** ar) { *ar = (struct archive_read*) a; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_rar5"); return ARCHIVE_OK; } static int read_ahead(struct archive_read* a, size_t how_many, const uint8_t** ptr) { if(!ptr) return 0; ssize_t avail = -1; *ptr = __archive_read_ahead(a, how_many, &avail); if(*ptr == NULL) { return 0; } return 1; } static int consume(struct archive_read* a, int64_t how_many) { int ret; ret = how_many == __archive_read_consume(a, how_many) ? ARCHIVE_OK : ARCHIVE_FATAL; return ret; } /** * Read a RAR5 variable sized numeric value. This value will be stored in * `pvalue`. The `pvalue_len` argument points to a variable that will receive * the byte count that was consumed in order to decode the `pvalue` value, plus * one. * * pvalue_len is optional and can be NULL. * * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` * is NULL, this consuming operation is done automatically. * * Returns 1 if *pvalue was successfully read. * Returns 0 if there was an error. In this case, *pvalue contains an * invalid value. */ static int read_var(struct archive_read* a, uint64_t* pvalue, uint64_t* pvalue_len) { uint64_t result = 0; size_t shift, i; const uint8_t* p; uint8_t b; /* We will read maximum of 8 bytes. We don't have to handle the situation * to read the RAR5 variable-sized value stored at the end of the file, * because such situation will never happen. */ if(!read_ahead(a, 8, &p)) return 0; for(shift = 0, i = 0; i < 8; i++, shift += 7) { b = p[i]; /* Strip the MSB from the input byte and add the resulting number * to the `result`. */ result += (b & 0x7F) << shift; /* MSB set to 1 means we need to continue decoding process. MSB set * to 0 means we're done. * * This conditional checks for the second case. */ if((b & 0x80) == 0) { if(pvalue) { *pvalue = result; } /* If the caller has passed the `pvalue_len` pointer, store the * number of consumed bytes in it and do NOT consume those bytes, * since the caller has all the information it needs to perform * the consuming process itself. */ if(pvalue_len) { *pvalue_len = 1 + i; } else { /* If the caller did not provide the `pvalue_len` pointer, * it will not have the possibility to advance the file * pointer, because it will not know how many bytes it needs * to consume. This is why we handle such situation here * automatically. */ if(ARCHIVE_OK != consume(a, 1 + i)) { return 0; } } /* End of decoding process, return success. */ return 1; } } /* The decoded value takes the maximum number of 8 bytes. It's a maximum * number of bytes, so end decoding process here even if the first bit * of last byte is 1. */ if(pvalue) { *pvalue = result; } if(pvalue_len) { *pvalue_len = 9; } else { if(ARCHIVE_OK != consume(a, 9)) { return 0; } } return 1; } static int read_var_sized(struct archive_read* a, size_t* pvalue, size_t* pvalue_len) { uint64_t v; uint64_t v_size = 0; const int ret = pvalue_len ? read_var(a, &v, &v_size) : read_var(a, &v, NULL); if(ret == 1 && pvalue) { *pvalue = (size_t) v; } if(pvalue_len) { /* Possible data truncation should be safe. */ *pvalue_len = (size_t) v_size; } return ret; } static int read_bits_32(struct rar5* rar, const uint8_t* p, uint32_t* value) { uint32_t bits = p[rar->bits.in_addr] << 24; bits |= p[rar->bits.in_addr + 1] << 16; bits |= p[rar->bits.in_addr + 2] << 8; bits |= p[rar->bits.in_addr + 3]; bits <<= rar->bits.bit_addr; bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); *value = bits; return ARCHIVE_OK; } static int read_bits_16(struct rar5* rar, const uint8_t* p, uint16_t* value) { int bits = (int) p[rar->bits.in_addr] << 16; bits |= (int) p[rar->bits.in_addr + 1] << 8; bits |= (int) p[rar->bits.in_addr + 2]; bits >>= (8 - rar->bits.bit_addr); *value = bits & 0xffff; return ARCHIVE_OK; } static void skip_bits(struct rar5* rar, int bits) { const int new_bits = rar->bits.bit_addr + bits; rar->bits.in_addr += new_bits >> 3; rar->bits.bit_addr = new_bits & 7; } /* n = up to 16 */ static int read_consume_bits(struct rar5* rar, const uint8_t* p, int n, int* value) { uint16_t v; int ret, num; if(n == 0 || n > 16) { /* This is a programmer error and should never happen in runtime. */ return ARCHIVE_FATAL; } ret = read_bits_16(rar, p, &v); if(ret != ARCHIVE_OK) return ret; num = (int) v; num >>= 16 - n; skip_bits(rar, n); if(value) *value = num; return ARCHIVE_OK; } static int read_u32(struct archive_read* a, uint32_t* pvalue) { const uint8_t* p; if(!read_ahead(a, 4, &p)) return 0; - *pvalue = *(const uint32_t*)p; - + *pvalue = archive_le32dec(p); return ARCHIVE_OK == consume(a, 4) ? 1 : 0; } static int read_u64(struct archive_read* a, uint64_t* pvalue) { const uint8_t* p; if(!read_ahead(a, 8, &p)) return 0; - *pvalue = *(const uint64_t*)p; - + *pvalue = archive_le64dec(p); return ARCHIVE_OK == consume(a, 8) ? 1 : 0; } static int bid_standard(struct archive_read* a) { const uint8_t* p; if(!read_ahead(a, rar5_signature_size, &p)) return -1; if(!memcmp(rar5_signature, p, rar5_signature_size)) return 30; return -1; } static int rar5_bid(struct archive_read* a, int best_bid) { int my_bid; if(best_bid > 30) return -1; my_bid = bid_standard(a); if(my_bid > -1) { return my_bid; } return -1; } static int rar5_options(struct archive_read *a, const char *key, const char *val) { (void) a; (void) key; (void) val; /* No options supported in this version. Return the ARCHIVE_WARN code to * signal the options supervisor that the unpacker didn't handle setting * this option. */ return ARCHIVE_WARN; } static void init_header(struct archive_read* a) { a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; a->archive.archive_format_name = "RAR5"; } enum HEADER_FLAGS { HFL_EXTRA_DATA = 0x0001, HFL_DATA = 0x0002, HFL_SKIP_IF_UNKNOWN = 0x0004, HFL_SPLIT_BEFORE = 0x0008, HFL_SPLIT_AFTER = 0x0010, HFL_CHILD = 0x0020, HFL_INHERITED = 0x0040 }; static int process_main_locator_extra_block(struct archive_read* a, struct rar5* rar) { uint64_t locator_flags; if(!read_var(a, &locator_flags, NULL)) { return ARCHIVE_EOF; } enum LOCATOR_FLAGS { QLIST = 0x01, RECOVERY = 0x02, }; if(locator_flags & QLIST) { if(!read_var(a, &rar->qlist_offset, NULL)) { return ARCHIVE_EOF; } /* qlist is not used */ } if(locator_flags & RECOVERY) { if(!read_var(a, &rar->rr_offset, NULL)) { return ARCHIVE_EOF; } /* rr is not used */ } return ARCHIVE_OK; } static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, ssize_t* extra_data_size) { size_t hash_type; size_t value_len; if(!read_var_sized(a, &hash_type, &value_len)) return ARCHIVE_EOF; *extra_data_size -= value_len; if(ARCHIVE_OK != consume(a, value_len)) { return ARCHIVE_EOF; } enum HASH_TYPE { BLAKE2sp = 0x00 }; /* The file uses BLAKE2sp checksum algorithm instead of plain old * CRC32. */ if(hash_type == BLAKE2sp) { const uint8_t* p; const int hash_size = sizeof(rar->file.blake2sp); if(!read_ahead(a, hash_size, &p)) return ARCHIVE_EOF; rar->file.has_blake2 = 1; memcpy(&rar->file.blake2sp, p, hash_size); if(ARCHIVE_OK != consume(a, hash_size)) { return ARCHIVE_EOF; } *extra_data_size -= hash_size; } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported hash type (0x%02x)", (int) hash_type); return ARCHIVE_FATAL; } return ARCHIVE_OK; } static uint64_t time_win_to_unix(uint64_t win_time) { const size_t ns_in_sec = 10000000; const uint64_t sec_to_unix = 11644473600LL; return win_time / ns_in_sec - sec_to_unix; } static int parse_htime_item(struct archive_read* a, char unix_time, uint64_t* where, ssize_t* extra_data_size) { if(unix_time) { uint32_t time_val; if(!read_u32(a, &time_val)) return ARCHIVE_EOF; *extra_data_size -= 4; *where = (uint64_t) time_val; } else { uint64_t windows_time; if(!read_u64(a, &windows_time)) return ARCHIVE_EOF; *where = time_win_to_unix(windows_time); *extra_data_size -= 8; } return ARCHIVE_OK; } static int parse_file_extra_htime(struct archive_read* a, struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) { char unix_time = 0; size_t flags; size_t value_len; enum HTIME_FLAGS { IS_UNIX = 0x01, HAS_MTIME = 0x02, HAS_CTIME = 0x04, HAS_ATIME = 0x08, HAS_UNIX_NS = 0x10, }; if(!read_var_sized(a, &flags, &value_len)) return ARCHIVE_EOF; *extra_data_size -= value_len; if(ARCHIVE_OK != consume(a, value_len)) { return ARCHIVE_EOF; } unix_time = flags & IS_UNIX; if(flags & HAS_MTIME) { parse_htime_item(a, unix_time, &rar->file.e_mtime, extra_data_size); archive_entry_set_mtime(e, rar->file.e_mtime, 0); } if(flags & HAS_CTIME) { parse_htime_item(a, unix_time, &rar->file.e_ctime, extra_data_size); archive_entry_set_ctime(e, rar->file.e_ctime, 0); } if(flags & HAS_ATIME) { parse_htime_item(a, unix_time, &rar->file.e_atime, extra_data_size); archive_entry_set_atime(e, rar->file.e_atime, 0); } if(flags & HAS_UNIX_NS) { if(!read_u32(a, &rar->file.e_unix_ns)) return ARCHIVE_EOF; *extra_data_size -= 4; } return ARCHIVE_OK; } static int process_head_file_extra(struct archive_read* a, struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size) { size_t extra_field_size; size_t extra_field_id = 0; int ret = ARCHIVE_FATAL; size_t var_size; enum EXTRA { CRYPT = 0x01, HASH = 0x02, HTIME = 0x03, VERSION_ = 0x04, REDIR = 0x05, UOWNER = 0x06, SUBDATA = 0x07 }; while(extra_data_size > 0) { if(!read_var_sized(a, &extra_field_size, &var_size)) return ARCHIVE_EOF; extra_data_size -= var_size; if(ARCHIVE_OK != consume(a, var_size)) { return ARCHIVE_EOF; } if(!read_var_sized(a, &extra_field_id, &var_size)) return ARCHIVE_EOF; extra_data_size -= var_size; if(ARCHIVE_OK != consume(a, var_size)) { return ARCHIVE_EOF; } switch(extra_field_id) { case HASH: ret = parse_file_extra_hash(a, rar, &extra_data_size); break; case HTIME: ret = parse_file_extra_htime(a, e, rar, &extra_data_size); break; case CRYPT: /* fallthrough */ case VERSION_: /* fallthrough */ case REDIR: /* fallthrough */ case UOWNER: /* fallthrough */ case SUBDATA: /* fallthrough */ default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unknown extra field in file/service block: 0x%02x", (int) extra_field_id); return ARCHIVE_FATAL; } } if(ret != ARCHIVE_OK) { /* Attribute not implemented. */ return ret; } return ARCHIVE_OK; } static int process_head_file(struct archive_read* a, struct rar5* rar, struct archive_entry* entry, size_t block_flags) { ssize_t extra_data_size = 0; size_t data_size = 0; size_t file_flags = 0; size_t file_attr = 0; size_t compression_info = 0; size_t host_os = 0; size_t name_size = 0; uint64_t unpacked_size; uint32_t mtime = 0, crc = 0; int c_method = 0, c_version = 0, is_dir; char name_utf8_buf[2048 * 4]; const uint8_t* p; memset(entry, 0, sizeof(struct archive_entry)); /* Do not reset file context if we're switching archives. */ if(!rar->cstate.switch_multivolume) { reset_file_context(rar); } if(block_flags & HFL_EXTRA_DATA) { size_t edata_size = 0; if(!read_var_sized(a, &edata_size, NULL)) return ARCHIVE_EOF; /* Intentional type cast from unsigned to signed. */ extra_data_size = (ssize_t) edata_size; } if(block_flags & HFL_DATA) { if(!read_var_sized(a, &data_size, NULL)) return ARCHIVE_EOF; rar->file.bytes_remaining = data_size; } else { rar->file.bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "no data found in file/service block"); return ARCHIVE_FATAL; } enum FILE_FLAGS { DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, UNKNOWN_UNPACKED_SIZE = 0x0008, }; enum COMP_INFO_FLAGS { SOLID = 0x0040, }; if(!read_var_sized(a, &file_flags, NULL)) return ARCHIVE_EOF; if(!read_var(a, &unpacked_size, NULL)) return ARCHIVE_EOF; if(file_flags & UNKNOWN_UNPACKED_SIZE) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Files with unknown unpacked size are not supported"); return ARCHIVE_FATAL; } is_dir = (int) (file_flags & DIRECTORY); if(!read_var_sized(a, &file_attr, NULL)) return ARCHIVE_EOF; if(file_flags & UTIME) { if(!read_u32(a, &mtime)) return ARCHIVE_EOF; } if(file_flags & CRC32) { if(!read_u32(a, &crc)) return ARCHIVE_EOF; } if(!read_var_sized(a, &compression_info, NULL)) return ARCHIVE_EOF; c_method = (int) (compression_info >> 7) & 0x7; c_version = (int) (compression_info & 0x3f); rar->cstate.window_size = is_dir ? 0 : g_unpack_window_size << ((compression_info >> 10) & 15); rar->cstate.method = c_method; rar->cstate.version = c_version + 50; rar->file.solid = (compression_info & SOLID) > 0; rar->file.service = 0; if(!read_var_sized(a, &host_os, NULL)) return ARCHIVE_EOF; enum HOST_OS { HOST_WINDOWS = 0, HOST_UNIX = 1, }; if(host_os == HOST_WINDOWS) { /* Host OS is Windows */ unsigned short mode = 0660; if(is_dir) mode |= AE_IFDIR; else mode |= AE_IFREG; archive_entry_set_mode(entry, mode); } else if(host_os == HOST_UNIX) { /* Host OS is Unix */ archive_entry_set_mode(entry, (unsigned short) file_attr); } else { /* Unknown host OS */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported Host OS: 0x%02x", (int) host_os); return ARCHIVE_FATAL; } if(!read_var_sized(a, &name_size, NULL)) return ARCHIVE_EOF; if(!read_ahead(a, name_size, &p)) return ARCHIVE_EOF; if(name_size > 2047) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Filename is too long"); return ARCHIVE_FATAL; } if(name_size == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "No filename specified"); return ARCHIVE_FATAL; } memcpy(name_utf8_buf, p, name_size); name_utf8_buf[name_size] = 0; if(ARCHIVE_OK != consume(a, name_size)) { return ARCHIVE_EOF; } if(extra_data_size > 0) { int ret = process_head_file_extra(a, entry, rar, extra_data_size); /* Sanity check. */ if(extra_data_size < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "File extra data size is not zero"); return ARCHIVE_FATAL; } if(ret != ARCHIVE_OK) return ret; } if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { rar->file.unpacked_size = (ssize_t) unpacked_size; archive_entry_set_size(entry, unpacked_size); } if(file_flags & UTIME) { archive_entry_set_mtime(entry, (time_t) mtime, 0); } if(file_flags & CRC32) { rar->file.stored_crc32 = crc; } archive_entry_update_pathname_utf8(entry, name_utf8_buf); if(!rar->cstate.switch_multivolume) { /* Do not reinitialize unpacking state if we're switching archives. */ rar->cstate.block_parsing_finished = 1; rar->cstate.all_filters_applied = 1; rar->cstate.initialized = 0; } if(rar->generic.split_before > 0) { /* If now we're standing on a header that has a 'split before' mark, * it means we're standing on a 'continuation' file header. Signal * the caller that if it wants to move to another file, it must call * rar5_read_header() function again. */ return ARCHIVE_RETRY; } else { return ARCHIVE_OK; } } static int process_head_service(struct archive_read* a, struct rar5* rar, struct archive_entry* entry, size_t block_flags) { /* Process this SERVICE block the same way as FILE blocks. */ int ret = process_head_file(a, rar, entry, block_flags); if(ret != ARCHIVE_OK) return ret; rar->file.service = 1; /* But skip the data part automatically. It's no use for the user anyway. * It contains only service data, not even needed to properly unpack the * file. */ ret = rar5_read_data_skip(a); if(ret != ARCHIVE_OK) return ret; /* After skipping, try parsing another block automatically. */ return ARCHIVE_RETRY; } static int process_head_main(struct archive_read* a, struct rar5* rar, struct archive_entry* entry, size_t block_flags) { (void) entry; int ret; size_t extra_data_size = 0; size_t extra_field_size = 0; size_t extra_field_id = 0; size_t archive_flags = 0; if(block_flags & HFL_EXTRA_DATA) { if(!read_var_sized(a, &extra_data_size, NULL)) return ARCHIVE_EOF; } else { extra_data_size = 0; } if(!read_var_sized(a, &archive_flags, NULL)) { return ARCHIVE_EOF; } enum MAIN_FLAGS { VOLUME = 0x0001, /* multi-volume archive */ VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't have it */ SOLID = 0x0004, /* solid archive */ PROTECT = 0x0008, /* contains Recovery info */ LOCK = 0x0010, /* readonly flag, not used */ }; rar->main.volume = (archive_flags & VOLUME) > 0; rar->main.solid = (archive_flags & SOLID) > 0; if(archive_flags & VOLUME_NUMBER) { size_t v = 0; if(!read_var_sized(a, &v, NULL)) { return ARCHIVE_EOF; } rar->main.vol_no = (int) v; } else { rar->main.vol_no = 0; } if(rar->vol.expected_vol_no > 0 && rar->main.vol_no != rar->vol.expected_vol_no) { /* Returning EOF instead of FATAL because of strange libarchive * behavior. When opening multiple files via * archive_read_open_filenames(), after reading up the whole last file, * the __archive_read_ahead function wraps up to the first archive * instead of returning EOF. */ return ARCHIVE_EOF; } if(extra_data_size == 0) { /* Early return. */ return ARCHIVE_OK; } if(!read_var_sized(a, &extra_field_size, NULL)) { return ARCHIVE_EOF; } if(!read_var_sized(a, &extra_field_id, NULL)) { return ARCHIVE_EOF; } if(extra_field_size == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid extra field size"); return ARCHIVE_FATAL; } enum MAIN_EXTRA { // Just one attribute here. LOCATOR = 0x01, }; switch(extra_field_id) { case LOCATOR: ret = process_main_locator_extra_block(a, rar); if(ret != ARCHIVE_OK) { /* Error while parsing main locator extra block. */ return ret; } break; default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported extra type (0x%02x)", (int) extra_field_id); return ARCHIVE_FATAL; } return ARCHIVE_OK; } static int scan_for_signature(struct archive_read* a); /* Base block processing function. A 'base block' is a RARv5 header block * that tells the reader what kind of data is stored inside the block. * * From the birds-eye view a RAR file looks file this: * * ... * * There are a few types of base blocks. Those types are specified inside * the 'switch' statement in this function. For example purposes, I'll write * how a standard RARv5 file could look like here: * *
* * The structure above could describe an archive file with 3 files in it, * one service "QuickOpen" block (that is ignored by this parser), and an * end of file base block marker. * * If the file is stored in multiple archive files ("multiarchive"), it might * look like this: * * .part01.rar:
* .part02.rar:
* .part03.rar:
* * This example could describe 3 RAR files that contain ONE archived file. * Or it could describe 3 RAR files that contain 3 different files. Or 3 * RAR files than contain 2 files. It all depends what metadata is stored in * the headers of blocks. * * Each block contains info about its size, the name of the file it's * storing inside, and whether this FILE block is a continuation block of * previous archive ('split before'), and is this FILE block should be * continued in another archive ('split after'). By parsing the 'split before' * and 'split after' flags, we're able to tell if multiple base blocks * are describing one file, or multiple files (with the same filename, for * example). * * One thing to note is that if we're parsing the first block, and * we see 'split after' flag, then we need to jump over to another * block to be able to decompress rest of the data. To do this, we need * to skip the block, then switch to another file, then skip the * block,
block, and then we're standing on the proper * block. */ static int process_base_block(struct archive_read* a, struct archive_entry* entry) { struct rar5* rar = get_context(a); uint32_t hdr_crc, computed_crc; size_t raw_hdr_size = 0, hdr_size_len, hdr_size; size_t header_id = 0; size_t header_flags = 0; const uint8_t* p; int ret; /* Skip any unprocessed data for this file. */ if(rar->file.bytes_remaining) { ret = rar5_read_data_skip(a); if(ret != ARCHIVE_OK) { return ret; } } /* Read the expected CRC32 checksum. */ if(!read_u32(a, &hdr_crc)) { return ARCHIVE_EOF; } /* Read header size. */ if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { return ARCHIVE_EOF; } /* Sanity check, maximum header size for RAR5 is 2MB. */ if(raw_hdr_size > (2 * 1024 * 1024)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Base block header is too large"); return ARCHIVE_FATAL; } hdr_size = raw_hdr_size + hdr_size_len; /* Read the whole header data into memory, maximum memory use here is * 2MB. */ if(!read_ahead(a, hdr_size, &p)) { return ARCHIVE_EOF; } /* Verify the CRC32 of the header data. */ computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); if(computed_crc != hdr_crc) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header CRC error"); return ARCHIVE_FATAL; } /* If the checksum is OK, we proceed with parsing. */ if(ARCHIVE_OK != consume(a, hdr_size_len)) { return ARCHIVE_EOF; } if(!read_var_sized(a, &header_id, NULL)) return ARCHIVE_EOF; if(!read_var_sized(a, &header_flags, NULL)) return ARCHIVE_EOF; rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; rar->generic.size = hdr_size; rar->generic.last_header_id = header_id; rar->main.endarc = 0; /* Those are possible header ids in RARv5. */ enum HEADER_TYPE { HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, HEAD_UNKNOWN = 0xff, }; switch(header_id) { case HEAD_MAIN: ret = process_head_main(a, rar, entry, header_flags); /* Main header doesn't have any files in it, so it's pointless * to return to the caller. Retry to next header, which should be * HEAD_FILE/HEAD_SERVICE. */ if(ret == ARCHIVE_OK) return ARCHIVE_RETRY; return ret; case HEAD_SERVICE: ret = process_head_service(a, rar, entry, header_flags); return ret; case HEAD_FILE: ret = process_head_file(a, rar, entry, header_flags); return ret; case HEAD_CRYPT: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Encryption is not supported"); return ARCHIVE_FATAL; case HEAD_ENDARC: rar->main.endarc = 1; /* After encountering an end of file marker, we need to take * into consideration if this archive is continued in another * file (i.e. is it part01.rar: is there a part02.rar?) */ if(rar->main.volume) { /* In case there is part02.rar, position the read pointer * in a proper place, so we can resume parsing. */ ret = scan_for_signature(a); if(ret == ARCHIVE_FATAL) { return ARCHIVE_EOF; } else { rar->vol.expected_vol_no = rar->main.vol_no + 1; return ARCHIVE_OK; } } else { return ARCHIVE_EOF; } case HEAD_MARK: return ARCHIVE_EOF; default: if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header type error"); return ARCHIVE_FATAL; } else { /* If the block is marked as 'skip if unknown', do as the flag * says: skip the block instead on failing on it. */ return ARCHIVE_RETRY; } } #if !defined WIN32 // Not reached. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Internal unpacker error"); return ARCHIVE_FATAL; #endif } static int skip_base_block(struct archive_read* a) { int ret; struct rar5* rar = get_context(a); struct archive_entry entry; ret = process_base_block(a, &entry); if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) return ARCHIVE_OK; if(ret == ARCHIVE_OK) return ARCHIVE_RETRY; else return ret; } static int rar5_read_header(struct archive_read *a, struct archive_entry *entry) { struct rar5* rar = get_context(a); int ret; if(rar->header_initialized == 0) { init_header(a); rar->header_initialized = 1; } if(rar->skipped_magic == 0) { if(ARCHIVE_OK != consume(a, rar5_signature_size)) { return ARCHIVE_EOF; } rar->skipped_magic = 1; } do { ret = process_base_block(a, entry); } while(ret == ARCHIVE_RETRY || (rar->main.endarc > 0 && ret == ARCHIVE_OK)); return ret; } static void init_unpack(struct rar5* rar) { rar->file.calculated_crc32 = 0; rar->cstate.window_mask = rar->cstate.window_size - 1; if(rar->cstate.window_buf) free(rar->cstate.window_buf); if(rar->cstate.filtered_buf) free(rar->cstate.filtered_buf); rar->cstate.window_buf = calloc(1, rar->cstate.window_size); rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); rar->cstate.write_ptr = 0; rar->cstate.last_write_ptr = 0; memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); } static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { int verify_crc; if(rar->skip_mode) { #if defined CHECK_CRC_ON_SOLID_SKIP verify_crc = 1; #else verify_crc = 0; #endif } else verify_crc = 1; if(verify_crc) { /* Don't update CRC32 if the file doesn't have the `stored_crc32` info filled in. */ if(rar->file.stored_crc32 > 0) { rar->file.calculated_crc32 = crc32(rar->file.calculated_crc32, p, to_read); } /* Check if the file uses an optional BLAKE2sp checksum algorithm. */ if(rar->file.has_blake2 > 0) { /* Return value of the `update` function is always 0, so we can * explicitly ignore it here. */ (void) blake2sp_update(&rar->file.b2state, p, to_read); } } } static int create_decode_tables(uint8_t* bit_length, struct decode_table* table, int size) { int code, upper_limit = 0, i, lc[16]; uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; ssize_t cur_len, quick_data_size; memset(&lc, 0, sizeof(lc)); memset(table->decode_num, 0, sizeof(table->decode_num)); table->size = size; table->quick_bits = size == HUFF_NC ? 10 : 7; for(i = 0; i < size; i++) { lc[bit_length[i] & 15]++; } lc[0] = 0; table->decode_pos[0] = 0; table->decode_len[0] = 0; for(i = 1; i < 16; i++) { upper_limit += lc[i]; table->decode_len[i] = upper_limit << (16 - i); table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; upper_limit <<= 1; } memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); for(i = 0; i < size; i++) { uint8_t clen = bit_length[i] & 15; if(clen > 0) { int last_pos = decode_pos_clone[clen]; table->decode_num[last_pos] = i; decode_pos_clone[clen]++; } } quick_data_size = 1 << table->quick_bits; cur_len = 1; for(code = 0; code < quick_data_size; code++) { int bit_field = code << (16 - table->quick_bits); int dist, pos; while(cur_len < rar5_countof(table->decode_len) && bit_field >= table->decode_len[cur_len]) { cur_len++; } table->quick_len[code] = (uint8_t) cur_len; dist = bit_field - table->decode_len[cur_len - 1]; dist >>= (16 - cur_len); - pos = table->decode_pos[cur_len] + dist; + pos = table->decode_pos[cur_len & 15] + dist; if(cur_len < rar5_countof(table->decode_pos) && pos < size) { table->quick_num[code] = table->decode_num[pos]; } else { table->quick_num[code] = 0; } } return ARCHIVE_OK; } static int decode_number(struct archive_read* a, struct decode_table* table, const uint8_t* p, uint16_t* num) { int i, bits, dist; uint16_t bitfield; uint32_t pos; struct rar5* rar = get_context(a); if(ARCHIVE_OK != read_bits_16(rar, p, &bitfield)) { return ARCHIVE_EOF; } bitfield &= 0xfffe; if(bitfield < table->decode_len[table->quick_bits]) { int code = bitfield >> (16 - table->quick_bits); skip_bits(rar, table->quick_len[code]); *num = table->quick_num[code]; return ARCHIVE_OK; } bits = 15; for(i = table->quick_bits + 1; i < 15; i++) { if(bitfield < table->decode_len[i]) { bits = i; break; } } skip_bits(rar, bits); dist = bitfield - table->decode_len[bits - 1]; dist >>= (16 - bits); pos = table->decode_pos[bits] + dist; if(pos >= table->size) pos = 0; *num = table->decode_num[pos]; return ARCHIVE_OK; } /* Reads and parses Huffman tables from the beginning of the block. */ static int parse_tables(struct archive_read* a, struct rar5* rar, const uint8_t* p) { int ret, value, i, w, idx = 0; uint8_t bit_length[HUFF_BC], table[HUFF_TABLE_SIZE], nibble_mask = 0xF0, nibble_shift = 4; enum { ESCAPE = 15 }; /* The data for table generation is compressed using a simple RLE-like * algorithm when storing zeroes, so we need to unpack it first. */ for(w = 0, i = 0; w < HUFF_BC;) { value = (p[i] & nibble_mask) >> nibble_shift; if(nibble_mask == 0x0F) ++i; nibble_mask ^= 0xFF; nibble_shift ^= 4; /* Values smaller than 15 is data, so we write it directly. Value 15 * is a flag telling us that we need to unpack more bytes. */ if(value == ESCAPE) { value = (p[i] & nibble_mask) >> nibble_shift; if(nibble_mask == 0x0F) ++i; nibble_mask ^= 0xFF; nibble_shift ^= 4; if(value == 0) { /* We sometimes need to write the actual value of 15, so this * case handles that. */ bit_length[w++] = ESCAPE; } else { int k; /* Fill zeroes. */ for(k = 0; k < value + 2; k++) { bit_length[w++] = 0; } } } else { bit_length[w++] = value; } } rar->bits.in_addr = i; rar->bits.bit_addr = nibble_shift ^ 4; ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Decoding huffman tables failed"); return ARCHIVE_FATAL; } for(i = 0; i < HUFF_TABLE_SIZE;) { uint16_t num; ret = decode_number(a, &rar->cstate.bd, p, &num); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Decoding huffman tables failed"); return ARCHIVE_FATAL; } if(num < 16) { /* 0..15: store directly */ table[i] = (uint8_t) num; i++; continue; } if(num < 18) { /* 16..17: repeat previous code */ uint16_t n; if(ARCHIVE_OK != read_bits_16(rar, p, &n)) return ARCHIVE_EOF; if(num == 16) { n >>= 13; n += 3; skip_bits(rar, 3); } else { n >>= 9; n += 11; skip_bits(rar, 7); } if(i > 0) { while(n-- > 0 && i < HUFF_TABLE_SIZE) { table[i] = table[i - 1]; i++; } } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unexpected error when decoding huffman tables"); return ARCHIVE_FATAL; } continue; } /* other codes: fill with zeroes `n` times */ uint16_t n; if(ARCHIVE_OK != read_bits_16(rar, p, &n)) return ARCHIVE_EOF; if(num == 18) { n >>= 13; n += 3; skip_bits(rar, 3); } else { n >>= 9; n += 11; skip_bits(rar, 7); } while(n-- > 0 && i < HUFF_TABLE_SIZE) table[i++] = 0; } ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Failed to create literal table"); return ARCHIVE_FATAL; } idx += HUFF_NC; ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Failed to create distance table"); return ARCHIVE_FATAL; } idx += HUFF_DC; ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Failed to create lower bits of distances table"); return ARCHIVE_FATAL; } idx += HUFF_LDC; ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); if(ret != ARCHIVE_OK) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Failed to create repeating distances table"); return ARCHIVE_FATAL; } return ARCHIVE_OK; } /* Parses the block header, verifies its CRC byte, and saves the header * fields inside the `hdr` pointer. */ static int parse_block_header(struct archive_read* a, const uint8_t* p, ssize_t* block_size, struct compressed_block_header* hdr) { memcpy(hdr, p, sizeof(struct compressed_block_header)); - if(hdr->block_flags.byte_count > 2) { + if(bf_byte_count(hdr) > 2) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported block header size (was %d, max is 2)", - hdr->block_flags.byte_count); + bf_byte_count(hdr)); return ARCHIVE_FATAL; } /* This should probably use bit reader interface in order to be more * future-proof. */ *block_size = 0; - switch(hdr->block_flags.byte_count) { + switch(bf_byte_count(hdr)) { /* 1-byte block size */ case 0: *block_size = *(const uint8_t*) &p[2]; break; /* 2-byte block size */ case 1: - *block_size = *(const uint16_t*) &p[2]; + *block_size = archive_le16dec(&p[2]); break; /* 3-byte block size */ case 2: - *block_size = *(const uint32_t*) &p[2]; + *block_size = archive_le32dec(&p[2]); *block_size &= 0x00FFFFFF; break; /* Other block sizes are not supported. This case is not reached, * because we have an 'if' guard before the switch that makes sure * of it. */ default: return ARCHIVE_FATAL; } /* Verify the block header checksum. 0x5A is a magic value and is always * constant. */ uint8_t calculated_cksum = 0x5A ^ (uint8_t) hdr->block_flags_u8 ^ (uint8_t) *block_size ^ (uint8_t) (*block_size >> 8) ^ (uint8_t) (*block_size >> 16); if(calculated_cksum != hdr->block_cksum) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Block checksum error: got 0x%02x, expected 0x%02x", hdr->block_cksum, calculated_cksum); return ARCHIVE_FATAL; } return ARCHIVE_OK; } /* Convenience function used during filter processing. */ static int parse_filter_data(struct rar5* rar, const uint8_t* p, uint32_t* filter_data) { int i, bytes; uint32_t data = 0; if(ARCHIVE_OK != read_consume_bits(rar, p, 2, &bytes)) return ARCHIVE_EOF; bytes++; for(i = 0; i < bytes; i++) { uint16_t byte; if(ARCHIVE_OK != read_bits_16(rar, p, &byte)) { return ARCHIVE_EOF; } data += (byte >> 8) << (i * 8); skip_bits(rar, 8); } *filter_data = data; return ARCHIVE_OK; } /* Function is used during sanity checking. */ static int is_valid_filter_block_start(struct rar5* rar, uint32_t start) { const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; const int64_t last_bs = rar->cstate.last_block_start; const ssize_t last_bl = rar->cstate.last_block_length; if(last_bs == 0 || last_bl == 0) { /* We didn't have any filters yet, so accept this offset. */ return 1; } if(block_start >= last_bs + last_bl) { /* Current offset is bigger than last block's end offset, so * accept current offset. */ return 1; } /* Any other case is not a normal situation and we should fail. */ return 0; } /* The function will create a new filter, read its parameters from the input * stream and add it to the filter collection. */ static int parse_filter(struct archive_read* ar, const uint8_t* p) { uint32_t block_start, block_length; uint16_t filter_type; struct rar5* rar = get_context(ar); /* Read the parameters from the input stream. */ if(ARCHIVE_OK != parse_filter_data(rar, p, &block_start)) return ARCHIVE_EOF; if(ARCHIVE_OK != parse_filter_data(rar, p, &block_length)) return ARCHIVE_EOF; if(ARCHIVE_OK != read_bits_16(rar, p, &filter_type)) return ARCHIVE_EOF; filter_type >>= 13; skip_bits(rar, 3); /* Perform some sanity checks on this filter parameters. Note that we * allow only DELTA, E8/E9 and ARM filters here, because rest of filters * are not used in RARv5. */ if(block_length < 4 || block_length > 0x400000 || filter_type > FILTER_ARM || !is_valid_filter_block_start(rar, block_start)) { archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Invalid " "filter encountered"); return ARCHIVE_FATAL; } /* Allocate a new filter. */ struct filter_info* filt = add_new_filter(rar); if(filt == NULL) { archive_set_error(&ar->archive, ENOMEM, "Can't allocate memory for a " "filter descriptor."); return ARCHIVE_FATAL; } filt->type = filter_type; filt->block_start = rar->cstate.write_ptr + block_start; filt->block_length = block_length; rar->cstate.last_block_start = filt->block_start; rar->cstate.last_block_length = filt->block_length; /* Read some more data in case this is a DELTA filter. Other filter types * don't require any additional data over what was already read. */ if(filter_type == FILTER_DELTA) { int channels; if(ARCHIVE_OK != read_consume_bits(rar, p, 5, &channels)) return ARCHIVE_EOF; filt->channels = channels + 1; } return ARCHIVE_OK; } static int decode_code_length(struct rar5* rar, const uint8_t* p, uint16_t code) { int lbits, length = 2; if(code < 8) { lbits = 0; length += code; } else { lbits = code / 4 - 1; length += (4 | (code & 3)) << lbits; } if(lbits > 0) { int add; if(ARCHIVE_OK != read_consume_bits(rar, p, lbits, &add)) return -1; length += add; } return length; } static int copy_string(struct archive_read* a, int len, int dist) { struct rar5* rar = get_context(a); const int cmask = rar->cstate.window_mask; const int64_t write_ptr = rar->cstate.write_ptr + rar->cstate.solid_offset; int i; /* The unpacker spends most of the time in this function. It would be * a good idea to introduce some optimizations here. * * Just remember that this loop treats buffers that overlap differently * than buffers that do not overlap. This is why a simple memcpy(3) call * will not be enough. */ for(i = 0; i < len; i++) { const ssize_t write_idx = (write_ptr + i) & cmask; const ssize_t read_idx = (write_ptr + i - dist) & cmask; rar->cstate.window_buf[write_idx] = rar->cstate.window_buf[read_idx]; } rar->cstate.write_ptr += len; return ARCHIVE_OK; } static int do_uncompress_block(struct archive_read* a, const uint8_t* p) { struct rar5* rar = get_context(a); uint16_t num; int ret; const int cmask = rar->cstate.window_mask; const struct compressed_block_header* hdr = &rar->last_block_hdr; - const uint8_t bit_size = 1 + hdr->block_flags.bit_size; + const uint8_t bit_size = 1 + bf_bit_size(hdr); while(1) { if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > (rar->cstate.window_size >> 1)) { /* Don't allow growing data by more than half of the window size * at a time. In such case, break the loop; next call to this * function will continue processing from this moment. */ break; } if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && rar->bits.bit_addr >= bit_size)) { /* If the program counter is here, it means the function has * finished processing the block. */ rar->cstate.block_parsing_finished = 1; break; } /* Decode the next literal. */ if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { return ARCHIVE_EOF; } /* Num holds a decompression literal, or 'command code'. * * - Values lower than 256 are just bytes. Those codes can be stored * in the output buffer directly. * * - Code 256 defines a new filter, which is later used to transform * the data block accordingly to the filter type. The data block * needs to be fully uncompressed first. * * - Code bigger than 257 and smaller than 262 define a repetition * pattern that should be copied from an already uncompressed chunk * of data. */ if(num < 256) { /* Directly store the byte. */ int64_t write_idx = rar->cstate.solid_offset + rar->cstate.write_ptr++; rar->cstate.window_buf[write_idx & cmask] = (uint8_t) num; continue; } else if(num >= 262) { uint16_t dist_slot; int len = decode_code_length(rar, p, num - 262), dbits, dist = 1; if(len == -1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Failed to decode the code length"); return ARCHIVE_FATAL; } if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, &dist_slot)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Failed to decode the distance slot"); return ARCHIVE_FATAL; } if(dist_slot < 4) { dbits = 0; dist += dist_slot; } else { dbits = dist_slot / 2 - 1; dist += (2 | (dist_slot & 1)) << dbits; } if(dbits > 0) { if(dbits >= 4) { uint32_t add = 0; uint16_t low_dist; if(dbits > 4) { if(ARCHIVE_OK != read_bits_32(rar, p, &add)) { /* Return EOF if we can't read more data. */ return ARCHIVE_EOF; } skip_bits(rar, dbits - 4); add = (add >> (36 - dbits)) << 4; dist += add; } if(ARCHIVE_OK != decode_number(a, &rar->cstate.ldd, p, &low_dist)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Failed to decode the distance slot"); return ARCHIVE_FATAL; } dist += low_dist; } else { /* dbits is one of [0,1,2,3] */ int add; if(ARCHIVE_OK != read_consume_bits(rar, p, dbits, &add)) { /* Return EOF if we can't read more data. */ return ARCHIVE_EOF; } dist += add; } } if(dist > 0x100) { len++; if(dist > 0x2000) { len++; if(dist > 0x40000) { len++; } } } dist_cache_push(rar, dist); rar->cstate.last_len = len; if(ARCHIVE_OK != copy_string(a, len, dist)) return ARCHIVE_FATAL; continue; } else if(num == 256) { /* Create a filter. */ ret = parse_filter(a, p); if(ret != ARCHIVE_OK) return ret; continue; } else if(num == 257) { if(rar->cstate.last_len != 0) { if(ARCHIVE_OK != copy_string(a, rar->cstate.last_len, rar->cstate.dist_cache[0])) { return ARCHIVE_FATAL; } } continue; } else if(num < 262) { const int idx = num - 258; const int dist = dist_cache_touch(rar, idx); uint16_t len_slot; int len; if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, &len_slot)) { return ARCHIVE_FATAL; } len = decode_code_length(rar, p, len_slot); rar->cstate.last_len = len; if(ARCHIVE_OK != copy_string(a, len, dist)) return ARCHIVE_FATAL; continue; } /* The program counter shouldn't reach here. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported block code: 0x%02x", num); return ARCHIVE_FATAL; } return ARCHIVE_OK; } /* Binary search for the RARv5 signature. */ static int scan_for_signature(struct archive_read* a) { const uint8_t* p; const int chunk_size = 512; ssize_t i; /* If we're here, it means we're on an 'unknown territory' data. * There's no indication what kind of data we're reading here. It could be * some text comment, any kind of binary data, digital sign, dragons, etc. * * We want to find a valid RARv5 magic header inside this unknown data. */ /* Is it possible in libarchive to just skip everything until the * end of the file? If so, it would be a better approach than the * current implementation of this function. */ while(1) { if(!read_ahead(a, chunk_size, &p)) return ARCHIVE_EOF; for(i = 0; i < chunk_size - rar5_signature_size; i++) { if(memcmp(&p[i], rar5_signature, rar5_signature_size) == 0) { /* Consume the number of bytes we've used to search for the * signature, as well as the number of bytes used by the * signature itself. After this we should be standing on a * valid base block header. */ (void) consume(a, i + rar5_signature_size); return ARCHIVE_OK; } } consume(a, chunk_size); } return ARCHIVE_FATAL; } /* This function will switch the multivolume archive file to another file, * i.e. from part03 to part 04. */ static int advance_multivolume(struct archive_read* a) { int lret; struct rar5* rar = get_context(a); /* A small state machine that will skip unnecessary data, needed to * switch from one multivolume to another. Such skipping is needed if * we want to be an stream-oriented (instead of file-oriented) * unpacker. * * The state machine starts with `rar->main.endarc` == 0. It also * assumes that current stream pointer points to some base block header. * * The `endarc` field is being set when the base block parsing function * encounters the 'end of archive' marker. */ while(1) { if(rar->main.endarc == 1) { rar->main.endarc = 0; while(ARCHIVE_RETRY == skip_base_block(a)); break; } else { /* Skip current base block. In order to properly skip it, * we really need to simply parse it and discard the results. */ lret = skip_base_block(a); /* The `skip_base_block` function tells us if we should continue * with skipping, or we should stop skipping. We're trying to skip * everything up to a base FILE block. */ if(lret != ARCHIVE_RETRY) { /* If there was an error during skipping, or we have just * skipped a FILE base block... */ if(rar->main.endarc == 0) { return lret; } else { continue; } } } } return ARCHIVE_OK; } /* Merges the partial block from the first multivolume archive file, and * partial block from the second multivolume archive file. The result is * a chunk of memory containing the whole block, and the stream pointer * is advanced to the next block in the second multivolume archive file. */ static int merge_block(struct archive_read* a, ssize_t block_size, const uint8_t** p) { struct rar5* rar = get_context(a); ssize_t cur_block_size, partial_offset = 0; const uint8_t* lp; int ret; /* Set a flag that we're in the switching mode. */ rar->cstate.switch_multivolume = 1; /* Reallocate the memory which will hold the whole block. */ if(rar->vol.push_buf) free((void*) rar->vol.push_buf); rar->vol.push_buf = malloc(block_size); if(!rar->vol.push_buf) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for a " "merge block buffer."); return ARCHIVE_FATAL; } /* A single block can span across multiple multivolume archive files, * so we use a loop here. This loop will consume enough multivolume * archive files until the whole block is read. */ while(1) { /* Get the size of current block chunk in this multivolume archive * file and read it. */ cur_block_size = rar5_min(rar->file.bytes_remaining, block_size - partial_offset); if(cur_block_size == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Encountered block size == 0 during block merge"); return ARCHIVE_FATAL; } if(!read_ahead(a, cur_block_size, &lp)) return ARCHIVE_EOF; /* Sanity check; there should never be a situation where this function * reads more data than the block's size. */ if(partial_offset + cur_block_size > block_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Consumed too much data when merging blocks."); return ARCHIVE_FATAL; } /* Merge previous block chunk with current block chunk, or create * first block chunk if this is our first iteration. */ memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); /* Advance the stream read pointer by this block chunk size. */ if(ARCHIVE_OK != consume(a, cur_block_size)) return ARCHIVE_EOF; /* Update the pointers. `partial_offset` contains information about * the sum of merged block chunks. */ partial_offset += cur_block_size; rar->file.bytes_remaining -= cur_block_size; /* If `partial_offset` is the same as `block_size`, this means we've * merged all block chunks and we have a valid full block. */ if(partial_offset == block_size) { break; } /* If we don't have any bytes to read, this means we should switch * to another multivolume archive file. */ if(rar->file.bytes_remaining == 0) { ret = advance_multivolume(a); if(ret != ARCHIVE_OK) return ret; } } *p = rar->vol.push_buf; /* If we're here, we can resume unpacking by processing the block pointed * to by the `*p` memory pointer. */ return ARCHIVE_OK; } static int process_block(struct archive_read* a) { const uint8_t* p; struct rar5* rar = get_context(a); int ret; /* If we don't have any data to be processed, this most probably means * we need to switch to the next volume. */ if(rar->main.volume && rar->file.bytes_remaining == 0) { ret = advance_multivolume(a); if(ret != ARCHIVE_OK) return ret; } if(rar->cstate.block_parsing_finished) { ssize_t block_size; rar->cstate.block_parsing_finished = 0; /* The header size won't be bigger than 6 bytes. */ if(!read_ahead(a, 6, &p)) { /* Failed to prefetch data block header. */ return ARCHIVE_EOF; } /* * Read block_size by parsing block header. Validate the header by * calculating CRC byte stored inside the header. Size of the header is * not constant (block size can be stored either in 1 or 2 bytes), * that's why block size is left out from the `compressed_block_header` * structure and returned by `parse_block_header` as the second * argument. */ ret = parse_block_header(a, p, &block_size, &rar->last_block_hdr); if(ret != ARCHIVE_OK) return ret; /* Skip block header. Next data is huffman tables, if present. */ ssize_t to_skip = sizeof(struct compressed_block_header) + - rar->last_block_hdr.block_flags.byte_count + 1; + bf_byte_count(&rar->last_block_hdr) + 1; if(ARCHIVE_OK != consume(a, to_skip)) return ARCHIVE_EOF; rar->file.bytes_remaining -= to_skip; /* The block size gives information about the whole block size, but * the block could be stored in split form when using multi-volume * archives. In this case, the block size will be bigger than the * actual data stored in this file. Remaining part of the data will * be in another file. */ ssize_t cur_block_size = rar5_min(rar->file.bytes_remaining, block_size); if(block_size > rar->file.bytes_remaining) { /* If current blocks' size is bigger than our data size, this * means we have a multivolume archive. In this case, skip * all base headers until the end of the file, proceed to next * "partXXX.rar" volume, find its signature, skip all headers up * to the first FILE base header, and continue from there. * * Note that `merge_block` will update the `rar` context structure * quite extensively. */ ret = merge_block(a, block_size, &p); if(ret != ARCHIVE_OK) { return ret; } cur_block_size = block_size; /* Current stream pointer should be now directly *after* the * block that spanned through multiple archive files. `p` pointer * should have the data of the *whole* block (merged from * partial blocks stored in multiple archives files). */ } else { rar->cstate.switch_multivolume = 0; /* Read the whole block size into memory. This can take up to * 8 megabytes of memory in theoretical cases. Might be worth to * optimize this and use a standard chunk of 4kb's. */ if(!read_ahead(a, 4 + cur_block_size, &p)) { /* Failed to prefetch block data. */ return ARCHIVE_EOF; } } rar->cstate.block_buf = p; rar->cstate.cur_block_size = cur_block_size; rar->bits.in_addr = 0; rar->bits.bit_addr = 0; - if(rar->last_block_hdr.block_flags.is_table_present) { + if(bf_is_table_present(&rar->last_block_hdr)) { /* Load Huffman tables. */ ret = parse_tables(a, rar, p); if(ret != ARCHIVE_OK) { /* Error during decompression of Huffman tables. */ return ret; } } } else { p = rar->cstate.block_buf; } /* Uncompress the block, or a part of it, depending on how many bytes * will be generated by uncompressing the block. * * In case too many bytes will be generated, calling this function again * will resume the uncompression operation. */ ret = do_uncompress_block(a, p); if(ret != ARCHIVE_OK) { return ret; } if(rar->cstate.block_parsing_finished && rar->cstate.switch_multivolume == 0 && rar->cstate.cur_block_size > 0) { /* If we're processing a normal block, consume the whole block. We * can do this because we've already read the whole block to memory. */ if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) return ARCHIVE_FATAL; rar->file.bytes_remaining -= rar->cstate.cur_block_size; } else if(rar->cstate.switch_multivolume) { /* Don't consume the block if we're doing multivolume processing. * The volume switching function will consume the proper count of * bytes instead. */ rar->cstate.switch_multivolume = 0; } return ARCHIVE_OK; } /* Pops the `buf`, `size` and `offset` from the "data ready" stack. * * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY * when there is no data on the stack. */ static int use_data(struct rar5* rar, const void** buf, size_t* size, int64_t* offset) { int i; for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { struct data_ready *d = &rar->cstate.dready[i]; if(d->used) { if(buf) *buf = d->buf; if(size) *size = d->size; if(offset) *offset = d->offset; d->used = 0; return ARCHIVE_OK; } } return ARCHIVE_RETRY; } /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready * FIFO stack. Those values will be popped from this stack by the `use_data` * function. */ static int push_data_ready(struct archive_read* a, struct rar5* rar, const uint8_t* buf, size_t size, int64_t offset) { int i; /* Don't push if we're in skip mode. This is needed because solid * streams need full processing even if we're skipping data. After fully * processing the stream, we need to discard the generated bytes, because * we're interested only in the side effect: building up the internal * window circular buffer. This window buffer will be used later during * unpacking of requested data. */ if(rar->skip_mode) return ARCHIVE_OK; /* Sanity check. */ if(offset != rar->file.last_offset + rar->file.last_size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Sanity " "check error: output stream is not continuous"); return ARCHIVE_FATAL; } for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { struct data_ready* d = &rar->cstate.dready[i]; if(!d->used) { d->used = 1; d->buf = buf; d->size = size; d->offset = offset; /* These fields are used only in sanity checking. */ rar->file.last_offset = offset; rar->file.last_size = size; /* Calculate the checksum of this new block before submitting * data to libarchive's engine. */ update_crc(rar, d->buf, d->size); return ARCHIVE_OK; } } /* Program counter will reach this code if the `rar->cstate.data_ready` * stack will be filled up so that no new entries will be allowed. The * code shouldn't allow such situation to occur. So we treat this case * as an internal error. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Error: " "premature end of data_ready stack"); return ARCHIVE_FATAL; } /* This function uncompresses the data that is stored in the base * block. * * The FILE base block looks like this: * *
... * * The
is a block header, that is parsed in parse_block_header(). * It's a "compressed_block_header" structure, containing metadata needed * to know when we should stop looking for more blocks. * * contain data needed to set up the huffman tables, needed * for the actual decompression. * * Each consists of series of literals: * * ... * * Those literals generate the uncompression data. They operate on a circular * buffer, sometimes writing raw data into it, sometimes referencing * some previous data inside this buffer, and sometimes declaring a filter * that will need to be executed on the data stored in the circular buffer. * It all depends on the literal that is used. * * Sometimes blocks produce output data, sometimes they don't. For example, for * some huge files that use lots of filters, sometimes a block is filled with * only filter declaration literals. Such blocks won't produce any data in the * circular buffer. * * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, * because a literal can reference previously decompressed data. For example, * there can be a literal that says: 'append a byte 0xFE here', and after * it another literal can say 'append 1 megabyte of data from circular buffer * offset 0x12345'. This is how RAR format handles compressing repeated * patterns. * * The RAR compressor creates those literals and the actual efficiency of * compression depends on what those literals are. The literals can also * be seen as a kind of a non-turing-complete virtual machine that simply * tells the decompressor what it should do. * */ static int do_uncompress_file(struct archive_read* a) { struct rar5* rar = get_context(a); int ret; int64_t max_end_pos; if(!rar->cstate.initialized) { /* Don't perform full context reinitialization if we're processing * a solid archive. */ if(!rar->main.solid || !rar->cstate.window_buf) { init_unpack(rar); } rar->cstate.initialized = 1; } if(rar->cstate.all_filters_applied == 1) { /* We use while(1) here, but standard case allows for just 1 iteration. * The loop will iterate if process_block() didn't generate any data at * all. This can happen if the block contains only filter definitions * (this is common in big files). */ while(1) { ret = process_block(a); if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) return ret; if(rar->cstate.last_write_ptr == rar->cstate.write_ptr) { /* The block didn't generate any new data, so just process * a new block. */ continue; } /* The block has generated some new data, so break the loop. */ break; } } /* Try to run filters. If filters won't be applied, it means that * insufficient data was generated. */ ret = apply_filters(a); if(ret == ARCHIVE_RETRY) { return ARCHIVE_OK; } else if(ret == ARCHIVE_FATAL) { return ARCHIVE_FATAL; } /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ if(cdeque_size(&rar->cstate.filters) > 0) { /* Check if we can write something before hitting first filter. */ struct filter_info* flt; /* Get the block_start offset from the first filter. */ if(CDE_OK != cdeque_front(&rar->cstate.filters, cdeque_filter_p(&flt))) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Can't read first filter"); return ARCHIVE_FATAL; } max_end_pos = rar5_min(flt->block_start, rar->cstate.write_ptr); } else { /* There are no filters defined, or all filters were applied. This * means we can just store the data without any postprocessing. */ max_end_pos = rar->cstate.write_ptr; } if(max_end_pos == rar->cstate.last_write_ptr) { /* We can't write anything yet. The block uncompression function did * not generate enough data, and no filter can be applied. At the same * time we don't have any data that can be stored without filter * postprocessing. This means we need to wait for more data to be * generated, so we can apply the filters. * * Signal the caller that we need more data to be able to do anything. */ return ARCHIVE_RETRY; } else { /* We can write the data before hitting the first filter. So let's * do it. The push_window_data() function will effectively return * the selected data block to the user application. */ push_window_data(a, rar, rar->cstate.last_write_ptr, max_end_pos); rar->cstate.last_write_ptr = max_end_pos; } return ARCHIVE_OK; } static int uncompress_file(struct archive_read* a) { int ret; while(1) { /* Sometimes the uncompression function will return a 'retry' signal. * If this will happen, we have to retry the function. */ ret = do_uncompress_file(a); if(ret != ARCHIVE_RETRY) return ret; } } static int do_unstore_file(struct archive_read* a, struct rar5* rar, const void** buf, size_t* size, int64_t* offset) { const uint8_t* p; if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && rar->generic.split_after > 0) { int ret; rar->cstate.switch_multivolume = 1; ret = advance_multivolume(a); rar->cstate.switch_multivolume = 0; if(ret != ARCHIVE_OK) { /* Failed to advance to next multivolume archive file. */ return ret; } } size_t to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); if(to_read == 0) { return ARCHIVE_EOF; } if(!read_ahead(a, to_read, &p)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "I/O error " "when unstoring file"); return ARCHIVE_FATAL; } if(ARCHIVE_OK != consume(a, to_read)) { return ARCHIVE_EOF; } if(buf) *buf = p; if(size) *size = to_read; if(offset) *offset = rar->cstate.last_unstore_ptr; rar->file.bytes_remaining -= to_read; rar->cstate.last_unstore_ptr += to_read; update_crc(rar, p, to_read); return ARCHIVE_OK; } static int do_unpack(struct archive_read* a, struct rar5* rar, const void** buf, size_t* size, int64_t* offset) { enum COMPRESSION_METHOD { STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, BEST = 5 }; if(rar->file.service > 0) { return do_unstore_file(a, rar, buf, size, offset); } else { switch(rar->cstate.method) { case STORE: return do_unstore_file(a, rar, buf, size, offset); case FASTEST: /* fallthrough */ case FAST: /* fallthrough */ case NORMAL: /* fallthrough */ case GOOD: /* fallthrough */ case BEST: return uncompress_file(a); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Compression method not supported: 0x%08x", rar->cstate.method); return ARCHIVE_FATAL; } } #if !defined WIN32 /* Not reached. */ return ARCHIVE_OK; #endif } static int verify_checksums(struct archive_read* a) { int verify_crc; struct rar5* rar = get_context(a); /* Check checksums only when actually unpacking the data. There's no need * to calculate checksum when we're skipping data in solid archives * (skipping in solid archives is the same thing as unpacking compressed * data and discarding the result). */ if(!rar->skip_mode) { /* Always check checksums if we're not in skip mode */ verify_crc = 1; } else { /* We can override the logic above with a compile-time option * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, and it * will check checksums of unpacked data even when we're skipping it. */ #if defined CHECK_CRC_ON_SOLID_SKIP /* Debug case */ verify_crc = 1; #else /* Normal case */ verify_crc = 0; #endif } if(verify_crc) { /* During unpacking, on each unpacked block we're calling the * update_crc() function. Since we are here, the unpacking process is * already over and we can check if calculated checksum (CRC32 or * BLAKE2sp) is the same as what is stored in the archive. */ if(rar->file.stored_crc32 > 0) { /* Check CRC32 only when the file contains a CRC32 value for this * file. */ if(rar->file.calculated_crc32 != rar->file.stored_crc32) { /* Checksums do not match; the unpacked file is corrupted. */ DEBUG_CODE { printf("Checksum error: CRC32 (was: %08x, expected: %08x)\n", rar->file.calculated_crc32, rar->file.stored_crc32); } #ifndef DONT_FAIL_ON_CRC_ERROR archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Checksum error: CRC32"); return ARCHIVE_FATAL; #endif } else { DEBUG_CODE { printf("Checksum OK: CRC32 (%08x/%08x)\n", rar->file.stored_crc32, rar->file.calculated_crc32); } } } if(rar->file.has_blake2 > 0) { /* BLAKE2sp is an optional checksum algorithm that is added to * RARv5 archives when using the `-htb` switch during creation of * archive. * * We now finalize the hash calculation by calling the `final` * function. This will generate the final hash value we can use to * compare it with the BLAKE2sp checksum that is stored in the * archive. * * The return value of this `final` function is not very helpful, * as it guards only against improper use. This is why we're * explicitly ignoring it. */ uint8_t b2_buf[32]; (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { #ifndef DONT_FAIL_ON_CRC_ERROR archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Checksum error: BLAKE2"); return ARCHIVE_FATAL; #endif } } } /* Finalization for this file has been successfully completed. */ return ARCHIVE_OK; } static int verify_global_checksums(struct archive_read* a) { return verify_checksums(a); } static int rar5_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { int ret; struct rar5* rar = get_context(a); if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Unpacker has written too many bytes"); return ARCHIVE_FATAL; } ret = use_data(rar, buff, size, offset); if(ret == ARCHIVE_OK) { return ret; } if(rar->file.eof == 1) { return ARCHIVE_EOF; } ret = do_unpack(a, rar, buff, size, offset); if(ret != ARCHIVE_OK) { return ret; } if(rar->file.bytes_remaining == 0 && rar->cstate.last_write_ptr == rar->file.unpacked_size) { /* If all bytes of current file were processed, run finalization. * * Finalization will check checksum against proper values. If * some of the checksums will not match, we'll return an error * value in the last `archive_read_data` call to signal an error * to the user. */ rar->file.eof = 1; return verify_global_checksums(a); } return ARCHIVE_OK; } static int rar5_read_data_skip(struct archive_read *a) { struct rar5* rar = get_context(a); if(rar->main.solid) { /* In solid archives, instead of skipping the data, we need to extract * it, and dispose the result. The side effect of this operation will * be setting up the initial window buffer state needed to be able to * extract the selected file. */ int ret; /* Make sure to process all blocks in the compressed stream. */ while(rar->file.bytes_remaining > 0) { /* Setting the "skip mode" will allow us to skip checksum checks * during data skipping. Checking the checksum of skipped data * isn't really necessary and it's only slowing things down. * * This is incremented instead of setting to 1 because this data * skipping function can be called recursively. */ rar->skip_mode++; /* We're disposing 1 block of data, so we use triple NULLs in * arguments. */ ret = rar5_read_data(a, NULL, NULL, NULL); /* Turn off "skip mode". */ rar->skip_mode--; if(ret < 0) { /* Propagate any potential error conditions to the caller. */ return ret; } } } else { /* In standard archives, we can just jump over the compressed stream. * Each file in non-solid archives starts from an empty window buffer. */ if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { return ARCHIVE_FATAL; } rar->file.bytes_remaining = 0; } return ARCHIVE_OK; } static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, int whence) { (void) a; (void) offset; (void) whence; /* We're a streaming unpacker, and we don't support seeking. */ return ARCHIVE_FATAL; } static int rar5_cleanup(struct archive_read *a) { struct rar5* rar = get_context(a); if(rar->cstate.window_buf) free(rar->cstate.window_buf); if(rar->cstate.filtered_buf) free(rar->cstate.filtered_buf); if(rar->vol.push_buf) free(rar->vol.push_buf); free_filters(rar); cdeque_free(&rar->cstate.filters); free(rar); a->format->data = NULL; return ARCHIVE_OK; } static int rar5_capabilities(struct archive_read * a) { (void) a; return 0; } static int rar5_has_encrypted_entries(struct archive_read *_a) { (void) _a; /* Unsupported for now. */ return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED; } static int rar5_init(struct rar5* rar) { ssize_t i; memset(rar, 0, sizeof(struct rar5)); /* Decrypt the magic signature pattern. Check the comment near the * `rar5_signature` symbol to read the rationale behind this. */ if(rar5_signature[0] == 243) { for(i = 0; i < rar5_signature_size; i++) { rar5_signature[i] ^= 0xA1; } } if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) return ARCHIVE_FATAL; return ARCHIVE_OK; } int archive_read_support_format_rar5(struct archive *_a) { struct archive_read* ar; int ret; struct rar5* rar; if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) return ret; rar = malloc(sizeof(*rar)); if(rar == NULL) { archive_set_error(&ar->archive, ENOMEM, "Can't allocate rar5 data"); return ARCHIVE_FATAL; } if(ARCHIVE_OK != rar5_init(rar)) { archive_set_error(&ar->archive, ENOMEM, "Can't allocate rar5 filter " "buffer"); return ARCHIVE_FATAL; } ret = __archive_read_register_format(ar, rar, "rar5", rar5_bid, rar5_options, rar5_read_header, rar5_read_data, rar5_read_data_skip, rar5_seek_data, rar5_cleanup, rar5_capabilities, rar5_has_encrypted_entries); if(ret != ARCHIVE_OK) { (void) rar5_cleanup(ar); } return ret; } Index: head/contrib/libarchive/libarchive/archive_read_support_format_warc.c =================================================================== --- head/contrib/libarchive/libarchive/archive_read_support_format_warc.c (revision 342041) +++ head/contrib/libarchive/libarchive/archive_read_support_format_warc.c (revision 342042) @@ -1,825 +1,830 @@ /*- * Copyright (c) 2014 Sebastian Freundt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); /** * WARC is standardised by ISO TC46/SC4/WG12 and currently available as * ISO 28500:2009. * For the purposes of this file we used the final draft from: * http://bibnum.bnf.fr/warc/WARC_ISO_28500_version1_latestdraft.pdf * * Todo: * [ ] real-world warcs can contain resources at endpoints ending in / * e.g. http://bibnum.bnf.fr/warc/ * if you're lucky their response contains a Content-Location: header * pointing to a unix-compliant filename, in the example above it's * Content-Location: http://bibnum.bnf.fr/warc/index.html * however, that's not mandated and github for example doesn't follow * this convention. * We need a set of archive options to control what to do with * entries like these, at the moment care is taken to skip them. * **/ #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_CTYPE_H #include #endif #ifdef HAVE_TIME_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_read_private.h" typedef enum { WT_NONE, /* warcinfo */ WT_INFO, /* metadata */ WT_META, /* resource */ WT_RSRC, /* request, unsupported */ WT_REQ, /* response, unsupported */ WT_RSP, /* revisit, unsupported */ WT_RVIS, /* conversion, unsupported */ WT_CONV, /* continuation, unsupported at the moment */ WT_CONT, /* invalid type */ LAST_WT } warc_type_t; typedef struct { size_t len; const char *str; } warc_string_t; typedef struct { size_t len; char *str; } warc_strbuf_t; struct warc_s { /* content length ahead */ size_t cntlen; /* and how much we've processed so far */ size_t cntoff; /* and how much we need to consume between calls */ size_t unconsumed; /* string pool */ warc_strbuf_t pool; /* previous version */ unsigned int pver; /* stringified format name */ struct archive_string sver; }; static int _warc_bid(struct archive_read *a, int); static int _warc_cleanup(struct archive_read *a); static int _warc_read(struct archive_read*, const void**, size_t*, int64_t*); static int _warc_skip(struct archive_read *a); static int _warc_rdhdr(struct archive_read *a, struct archive_entry *e); /* private routines */ static unsigned int _warc_rdver(const char buf[10], size_t bsz); static unsigned int _warc_rdtyp(const char *buf, size_t bsz); static warc_string_t _warc_rduri(const char *buf, size_t bsz); static ssize_t _warc_rdlen(const char *buf, size_t bsz); static time_t _warc_rdrtm(const char *buf, size_t bsz); static time_t _warc_rdmtm(const char *buf, size_t bsz); static const char *_warc_find_eoh(const char *buf, size_t bsz); static const char *_warc_find_eol(const char *buf, size_t bsz); int archive_read_support_format_warc(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct warc_s *w; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_warc"); if ((w = calloc(1, sizeof(*w))) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate warc data"); return (ARCHIVE_FATAL); } r = __archive_read_register_format( a, w, "warc", _warc_bid, NULL, _warc_rdhdr, _warc_read, _warc_skip, NULL, _warc_cleanup, NULL, NULL); if (r != ARCHIVE_OK) { free(w); return (r); } return (ARCHIVE_OK); } static int _warc_cleanup(struct archive_read *a) { struct warc_s *w = a->format->data; if (w->pool.len > 0U) { free(w->pool.str); } archive_string_free(&w->sver); free(w); a->format->data = NULL; return (ARCHIVE_OK); } static int _warc_bid(struct archive_read *a, int best_bid) { const char *hdr; ssize_t nrd; unsigned int ver; (void)best_bid; /* UNUSED */ /* check first line of file, it should be a record already */ if ((hdr = __archive_read_ahead(a, 12U, &nrd)) == NULL) { /* no idea what to do */ return -1; } else if (nrd < 12) { /* nah, not for us, our magic cookie is at least 12 bytes */ return -1; } /* otherwise snarf the record's version number */ ver = _warc_rdver(hdr, nrd); if (ver < 1200U || ver > 10000U) { /* we only support WARC 0.12 to 1.0 */ return -1; } /* otherwise be confident */ return (64); } static int _warc_rdhdr(struct archive_read *a, struct archive_entry *entry) { #define HDR_PROBE_LEN (12U) struct warc_s *w = a->format->data; unsigned int ver; const char *buf; ssize_t nrd; const char *eoh; /* for the file name, saves some strndup()'ing */ warc_string_t fnam; /* warc record type, not that we really use it a lot */ warc_type_t ftyp; /* content-length+error monad */ ssize_t cntlen; /* record time is the WARC-Date time we reinterpret it as ctime */ time_t rtime; /* mtime is the Last-Modified time which will be the entry's mtime */ time_t mtime; start_over: /* just use read_ahead() they keep track of unconsumed * bits and bobs for us; no need to put an extra shift in * and reproduce that functionality here */ buf = __archive_read_ahead(a, HDR_PROBE_LEN, &nrd); if (nrd < 0) { /* no good */ archive_set_error( &a->archive, ARCHIVE_ERRNO_MISC, "Bad record header"); return (ARCHIVE_FATAL); } else if (buf == NULL) { /* there should be room for at least WARC/bla\r\n * must be EOF therefore */ return (ARCHIVE_EOF); } /* looks good so far, try and find the end of the header now */ eoh = _warc_find_eoh(buf, nrd); if (eoh == NULL) { /* still no good, the header end might be beyond the * probe we've requested, but then again who'd cram * so much stuff into the header *and* be 28500-compliant */ archive_set_error( &a->archive, ARCHIVE_ERRNO_MISC, "Bad record header"); return (ARCHIVE_FATAL); } ver = _warc_rdver(buf, eoh - buf); /* we currently support WARC 0.12 to 1.0 */ if (ver == 0U) { archive_set_error( &a->archive, ARCHIVE_ERRNO_MISC, "Invalid record version"); return (ARCHIVE_FATAL); } else if (ver < 1200U || ver > 10000U) { archive_set_error( &a->archive, ARCHIVE_ERRNO_MISC, "Unsupported record version: %u.%u", ver / 10000, (ver % 10000) / 100); return (ARCHIVE_FATAL); } cntlen = _warc_rdlen(buf, eoh - buf); if (cntlen < 0) { /* nightmare! the specs say content-length is mandatory * so I don't feel overly bad stopping the reader here */ archive_set_error( &a->archive, EINVAL, "Bad content length"); return (ARCHIVE_FATAL); } rtime = _warc_rdrtm(buf, eoh - buf); if (rtime == (time_t)-1) { /* record time is mandatory as per WARC/1.0, * so just barf here, fast and loud */ archive_set_error( &a->archive, EINVAL, "Bad record time"); return (ARCHIVE_FATAL); } /* let the world know we're a WARC archive */ a->archive.archive_format = ARCHIVE_FORMAT_WARC; if (ver != w->pver) { /* stringify this entry's version */ archive_string_sprintf(&w->sver, "WARC/%u.%u", ver / 10000, (ver % 10000) / 100); /* remember the version */ w->pver = ver; } /* start off with the type */ ftyp = _warc_rdtyp(buf, eoh - buf); /* and let future calls know about the content */ w->cntlen = cntlen; w->cntoff = 0U; mtime = 0;/* Avoid compiling error on some platform. */ switch (ftyp) { case WT_RSRC: case WT_RSP: /* only try and read the filename in the cases that are * guaranteed to have one */ fnam = _warc_rduri(buf, eoh - buf); /* check the last character in the URI to avoid creating * directory endpoints as files, see Todo above */ if (fnam.len == 0 || fnam.str[fnam.len - 1] == '/') { /* break here for now */ fnam.len = 0U; fnam.str = NULL; break; } /* bang to our string pool, so we save a * malloc()+free() roundtrip */ if (fnam.len + 1U > w->pool.len) { w->pool.len = ((fnam.len + 64U) / 64U) * 64U; w->pool.str = realloc(w->pool.str, w->pool.len); } memcpy(w->pool.str, fnam.str, fnam.len); w->pool.str[fnam.len] = '\0'; /* let no one else know about the pool, it's a secret, shhh */ fnam.str = w->pool.str; /* snarf mtime or deduce from rtime * this is a custom header added by our writer, it's quite * hard to believe anyone else would go through with it * (apart from being part of some http responses of course) */ if ((mtime = _warc_rdmtm(buf, eoh - buf)) == (time_t)-1) { mtime = rtime; } break; default: fnam.len = 0U; fnam.str = NULL; break; } /* now eat some of those delicious buffer bits */ __archive_read_consume(a, eoh - buf); switch (ftyp) { case WT_RSRC: case WT_RSP: if (fnam.len > 0U) { /* populate entry object */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_copy_pathname(entry, fnam.str); archive_entry_set_size(entry, cntlen); archive_entry_set_perm(entry, 0644); /* rtime is the new ctime, mtime stays mtime */ archive_entry_set_ctime(entry, rtime, 0L); archive_entry_set_mtime(entry, mtime, 0L); break; } /* FALLTHROUGH */ default: /* consume the content and start over */ _warc_skip(a); goto start_over; } return (ARCHIVE_OK); } static int _warc_read(struct archive_read *a, const void **buf, size_t *bsz, int64_t *off) { struct warc_s *w = a->format->data; const char *rab; ssize_t nrd; if (w->cntoff >= w->cntlen) { eof: /* it's our lucky day, no work, we can leave early */ *buf = NULL; *bsz = 0U; *off = w->cntoff + 4U/*for \r\n\r\n separator*/; w->unconsumed = 0U; return (ARCHIVE_EOF); } + if (w->unconsumed) { + __archive_read_consume(a, w->unconsumed); + w->unconsumed = 0U; + } + rab = __archive_read_ahead(a, 1U, &nrd); if (nrd < 0) { *bsz = 0U; /* big catastrophe */ return (int)nrd; } else if (nrd == 0) { goto eof; } else if ((size_t)nrd > w->cntlen - w->cntoff) { /* clamp to content-length */ nrd = w->cntlen - w->cntoff; } *off = w->cntoff; *bsz = nrd; *buf = rab; w->cntoff += nrd; w->unconsumed = (size_t)nrd; return (ARCHIVE_OK); } static int _warc_skip(struct archive_read *a) { struct warc_s *w = a->format->data; __archive_read_consume(a, w->cntlen + 4U/*\r\n\r\n separator*/); w->cntlen = 0U; w->cntoff = 0U; return (ARCHIVE_OK); } /* private routines */ static void* deconst(const void *c) { return (char *)0x1 + (((const char *)c) - (const char *)0x1); } static char* xmemmem(const char *hay, const size_t haysize, const char *needle, const size_t needlesize) { const char *const eoh = hay + haysize; const char *const eon = needle + needlesize; const char *hp; const char *np; const char *cand; unsigned int hsum; unsigned int nsum; unsigned int eqp; /* trivial checks first * a 0-sized needle is defined to be found anywhere in haystack * then run strchr() to find a candidate in HAYSTACK (i.e. a portion * that happens to begin with *NEEDLE) */ if (needlesize == 0UL) { return deconst(hay); } else if ((hay = memchr(hay, *needle, haysize)) == NULL) { /* trivial */ return NULL; } /* First characters of haystack and needle are the same now. Both are * guaranteed to be at least one character long. Now computes the sum * of characters values of needle together with the sum of the first * needle_len characters of haystack. */ for (hp = hay + 1U, np = needle + 1U, hsum = *hay, nsum = *hay, eqp = 1U; hp < eoh && np < eon; hsum ^= *hp, nsum ^= *np, eqp &= *hp == *np, hp++, np++); /* HP now references the (NEEDLESIZE + 1)-th character. */ if (np < eon) { /* haystack is smaller than needle, :O */ return NULL; } else if (eqp) { /* found a match */ return deconst(hay); } /* now loop through the rest of haystack, * updating the sum iteratively */ for (cand = hay; hp < eoh; hp++) { hsum ^= *cand++; hsum ^= *hp; /* Since the sum of the characters is already known to be * equal at that point, it is enough to check just NEEDLESIZE - 1 * characters for equality, * also CAND is by design < HP, so no need for range checks */ if (hsum == nsum && memcmp(cand, needle, needlesize - 1U) == 0) { return deconst(cand); } } return NULL; } static int strtoi_lim(const char *str, const char **ep, int llim, int ulim) { int res = 0; const char *sp; /* we keep track of the number of digits via rulim */ int rulim; for (sp = str, rulim = ulim > 10 ? ulim : 10; res * 10 <= ulim && rulim && *sp >= '0' && *sp <= '9'; sp++, rulim /= 10) { res *= 10; res += *sp - '0'; } if (sp == str) { res = -1; } else if (res < llim || res > ulim) { res = -2; } *ep = (const char*)sp; return res; } static time_t time_from_tm(struct tm *t) { #if HAVE_TIMEGM /* Use platform timegm() if available. */ return (timegm(t)); #elif HAVE__MKGMTIME64 return (_mkgmtime64(t)); #else /* Else use direct calculation using POSIX assumptions. */ /* First, fix up tm_yday based on the year/month/day. */ if (mktime(t) == (time_t)-1) return ((time_t)-1); /* Then we can compute timegm() from first principles. */ return (t->tm_sec + t->tm_min * 60 + t->tm_hour * 3600 + t->tm_yday * 86400 + (t->tm_year - 70) * 31536000 + ((t->tm_year - 69) / 4) * 86400 - ((t->tm_year - 1) / 100) * 86400 + ((t->tm_year + 299) / 400) * 86400); #endif } static time_t xstrpisotime(const char *s, char **endptr) { /** like strptime() but strictly for ISO 8601 Zulu strings */ struct tm tm; time_t res = (time_t)-1; /* make sure tm is clean */ memset(&tm, 0, sizeof(tm)); /* as a courtesy to our callers, and since this is a non-standard * routine, we skip leading whitespace */ while (*s == ' ' || *s == '\t') ++s; /* read year */ if ((tm.tm_year = strtoi_lim(s, &s, 1583, 4095)) < 0 || *s++ != '-') { goto out; } /* read month */ if ((tm.tm_mon = strtoi_lim(s, &s, 1, 12)) < 0 || *s++ != '-') { goto out; } /* read day-of-month */ if ((tm.tm_mday = strtoi_lim(s, &s, 1, 31)) < 0 || *s++ != 'T') { goto out; } /* read hour */ if ((tm.tm_hour = strtoi_lim(s, &s, 0, 23)) < 0 || *s++ != ':') { goto out; } /* read minute */ if ((tm.tm_min = strtoi_lim(s, &s, 0, 59)) < 0 || *s++ != ':') { goto out; } /* read second */ if ((tm.tm_sec = strtoi_lim(s, &s, 0, 60)) < 0 || *s++ != 'Z') { goto out; } /* massage TM to fulfill some of POSIX' constraints */ tm.tm_year -= 1900; tm.tm_mon--; /* now convert our custom tm struct to a unix stamp using UTC */ res = time_from_tm(&tm); out: if (endptr != NULL) { *endptr = deconst(s); } return res; } static unsigned int _warc_rdver(const char *buf, size_t bsz) { static const char magic[] = "WARC/"; const char *c; unsigned int ver = 0U; unsigned int end = 0U; if (bsz < 12 || memcmp(buf, magic, sizeof(magic) - 1U) != 0) { /* buffer too small or invalid magic */ return ver; } /* looks good so far, read the version number for a laugh */ buf += sizeof(magic) - 1U; if (isdigit((unsigned char)buf[0U]) && (buf[1U] == '.') && isdigit((unsigned char)buf[2U])) { /* we support a maximum of 2 digits in the minor version */ if (isdigit((unsigned char)buf[3U])) end = 1U; /* set up major version */ ver = (buf[0U] - '0') * 10000U; /* set up minor version */ if (end == 1U) { ver += (buf[2U] - '0') * 1000U; ver += (buf[3U] - '0') * 100U; } else ver += (buf[2U] - '0') * 100U; /* * WARC below version 0.12 has a space-separated header * WARC 0.12 and above terminates the version with a CRLF */ c = buf + 3U + end; if (ver >= 1200U) { if (memcmp(c, "\r\n", 2U) != 0) ver = 0U; } else if (ver < 1200U) { if (*c != ' ' && *c != '\t') ver = 0U; } } return ver; } static unsigned int _warc_rdtyp(const char *buf, size_t bsz) { static const char _key[] = "\r\nWARC-Type:"; const char *val, *eol; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return WT_NONE; } val += sizeof(_key) - 1U; if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) { /* no end of line */ return WT_NONE; } /* overread whitespace */ while (val < eol && (*val == ' ' || *val == '\t')) ++val; if (val + 8U == eol) { if (memcmp(val, "resource", 8U) == 0) return WT_RSRC; else if (memcmp(val, "response", 8U) == 0) return WT_RSP; } return WT_NONE; } static warc_string_t _warc_rduri(const char *buf, size_t bsz) { static const char _key[] = "\r\nWARC-Target-URI:"; const char *val, *uri, *eol, *p; warc_string_t res = {0U, NULL}; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return res; } /* overread whitespace */ val += sizeof(_key) - 1U; if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) { /* no end of line */ return res; } while (val < eol && (*val == ' ' || *val == '\t')) ++val; /* overread URL designators */ if ((uri = xmemmem(val, eol - val, "://", 3U)) == NULL) { /* not touching that! */ return res; } /* spaces inside uri are not allowed, CRLF should follow */ for (p = val; p < eol; p++) { if (isspace((unsigned char)*p)) return res; } /* there must be at least space for ftp */ if (uri < (val + 3U)) return res; /* move uri to point to after :// */ uri += 3U; /* now then, inspect the URI */ if (memcmp(val, "file", 4U) == 0) { /* perfect, nothing left to do here */ } else if (memcmp(val, "http", 4U) == 0 || memcmp(val, "ftp", 3U) == 0) { /* overread domain, and the first / */ while (uri < eol && *uri++ != '/'); } else { /* not sure what to do? best to bugger off */ return res; } res.str = uri; res.len = eol - uri; return res; } static ssize_t _warc_rdlen(const char *buf, size_t bsz) { static const char _key[] = "\r\nContent-Length:"; const char *val, *eol; char *on = NULL; long int len; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return -1; } val += sizeof(_key) - 1U; if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) { /* no end of line */ return -1; } /* skip leading whitespace */ while (val < eol && (*val == ' ' || *val == '\t')) val++; /* there must be at least one digit */ if (!isdigit((unsigned char)*val)) return -1; len = strtol(val, &on, 10); if (on != eol) { /* line must end here */ return -1; } return (size_t)len; } static time_t _warc_rdrtm(const char *buf, size_t bsz) { static const char _key[] = "\r\nWARC-Date:"; const char *val, *eol; char *on = NULL; time_t res; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return (time_t)-1; } val += sizeof(_key) - 1U; if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) { /* no end of line */ return -1; } /* xstrpisotime() kindly overreads whitespace for us, so use that */ res = xstrpisotime(val, &on); if (on != eol) { /* line must end here */ return -1; } return res; } static time_t _warc_rdmtm(const char *buf, size_t bsz) { static const char _key[] = "\r\nLast-Modified:"; const char *val, *eol; char *on = NULL; time_t res; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return (time_t)-1; } val += sizeof(_key) - 1U; if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) { /* no end of line */ return -1; } /* xstrpisotime() kindly overreads whitespace for us, so use that */ res = xstrpisotime(val, &on); if (on != eol) { /* line must end here */ return -1; } return res; } static const char* _warc_find_eoh(const char *buf, size_t bsz) { static const char _marker[] = "\r\n\r\n"; const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U); if (hit != NULL) { hit += sizeof(_marker) - 1U; } return hit; } static const char* _warc_find_eol(const char *buf, size_t bsz) { static const char _marker[] = "\r\n"; const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U); return hit; } /* archive_read_support_format_warc.c ends here */ Index: head/contrib/libarchive/libarchive/test/test_read_format_rar5.c =================================================================== --- head/contrib/libarchive/libarchive/test/test_read_format_rar5.c (revision 342041) +++ head/contrib/libarchive/libarchive/test/test_read_format_rar5.c (revision 342042) @@ -1,769 +1,770 @@ /*- * Copyright (c) 2018 Grzegorz Antoniak * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" /* Some tests will want to calculate some CRC32's, and this header can * help. */ #define __LIBARCHIVE_BUILD #include +#include #define PROLOGUE(reffile) \ struct archive_entry *ae; \ struct archive *a; \ \ (void) a; /* Make the compiler happy if we won't use this variables */ \ (void) ae; /* in the test cases. */ \ \ extract_reference_file(reffile); \ assert((a = archive_read_new()) != NULL); \ assertA(0 == archive_read_support_filter_all(a)); \ assertA(0 == archive_read_support_format_all(a)); \ assertA(0 == archive_read_open_filename(a, reffile, 10240)) #define PROLOGUE_MULTI(reffile) \ struct archive_entry *ae; \ struct archive *a; \ \ (void) a; \ (void) ae; \ \ extract_reference_files(reffile); \ assert((a = archive_read_new()) != NULL); \ assertA(0 == archive_read_support_filter_all(a)); \ assertA(0 == archive_read_support_format_all(a)); \ assertA(0 == archive_read_open_filenames(a, reffile, 10240)) #define EPILOGUE() \ assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); \ assertEqualInt(ARCHIVE_OK, archive_read_free(a)) static int verify_data(const uint8_t* data_ptr, int magic, int size) { int i = 0; /* This is how the test data inside test files was generated; * we are re-generating it here and we check if our re-generated * test data is the same as in the test file. If this test is * failing it's either because there's a bug in the test case, * or the unpacked data is corrupted. */ for(i = 0; i < size / 4; ++i) { const int k = i + 1; const signed int* lptr = (const signed int*) &data_ptr[i * 4]; signed int val = k * k - 3 * k + (1 + magic); if(val < 0) val = 0; /* *lptr is a value inside unpacked test file, val is the * value that should be in the unpacked test file. */ - if(*lptr != val) + if(archive_le32dec(lptr) != (uint32_t) val) return 0; } return 1; } static int extract_one(struct archive* a, struct archive_entry* ae, uint32_t crc) { la_ssize_t fsize, bytes_read; uint8_t* buf; int ret = 1; uint32_t computed_crc; fsize = archive_entry_size(ae); buf = malloc(fsize); if(buf == NULL) return 1; bytes_read = archive_read_data(a, buf, fsize); if(bytes_read != fsize) { assertEqualInt(bytes_read, fsize); goto fn_exit; } computed_crc = crc32(0, buf, fsize); assertEqualInt(computed_crc, crc); ret = 0; fn_exit: free(buf); return ret; } DEFINE_TEST(test_read_format_rar5_stored) { const char helloworld_txt[] = "hello libarchive test suite!\n"; la_ssize_t file_size = sizeof(helloworld_txt) - 1; char buff[64]; PROLOGUE("test_read_format_rar5_stored.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("helloworld.txt", archive_entry_pathname(ae)); assertA((int) archive_entry_mtime(ae) > 0); assertA((int) archive_entry_ctime(ae) == 0); assertA((int) archive_entry_atime(ae) == 0); assertEqualInt(file_size, archive_entry_size(ae)); assertEqualInt(33188, archive_entry_mode(ae)); assertA(file_size == archive_read_data(a, buff, file_size)); assertEqualMem(buff, helloworld_txt, file_size); assertEqualInt(archive_entry_is_encrypted(ae), 0); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_compressed) { const int DATA_SIZE = 1200; uint8_t buff[DATA_SIZE]; PROLOGUE("test_read_format_rar5_compressed.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA((int) archive_entry_mtime(ae) > 0); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); verify_data(buff, 0, DATA_SIZE); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiple_files) { const int DATA_SIZE = 4096; uint8_t buff[DATA_SIZE]; PROLOGUE("test_read_format_rar5_multiple_files.rar"); /* There should be 4 files inside this test file. Check for their * existence, and also check the contents of those test files. */ assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 1, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 2, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 3, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 4, DATA_SIZE)); /* There should be no more files in this archive. */ assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } /* This test is really the same as the test above, but it deals with a solid * archive instead of a regular archive. The test solid archive contains the * same set of files as regular test archive, but it's size is 2x smaller, * because solid archives reuse the window buffer from previous compressed * files, so it's able to compress lots of small files more effectively. */ DEFINE_TEST(test_read_format_rar5_multiple_files_solid) { const int DATA_SIZE = 4096; uint8_t buff[DATA_SIZE]; PROLOGUE("test_read_format_rar5_multiple_files_solid.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 1, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 2, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 3, DATA_SIZE)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertEqualInt(DATA_SIZE, archive_entry_size(ae)); assertA(DATA_SIZE == archive_read_data(a, buff, DATA_SIZE)); assertA(verify_data(buff, 4, DATA_SIZE)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_skip_all) { const char* reffiles[] = { "test_read_format_rar5_multiarchive.part01.rar", "test_read_format_rar5_multiarchive.part02.rar", "test_read_format_rar5_multiarchive.part03.rar", "test_read_format_rar5_multiarchive.part04.rar", "test_read_format_rar5_multiarchive.part05.rar", "test_read_format_rar5_multiarchive.part06.rar", "test_read_format_rar5_multiarchive.part07.rar", "test_read_format_rar5_multiarchive.part08.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("home/antek/temp/build/unrar5/libarchive/bin/bsdcat_test", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("home/antek/temp/build/unrar5/libarchive/bin/bsdtar_test", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_skip_all_but_first) { const char* reffiles[] = { "test_read_format_rar5_multiarchive.part01.rar", "test_read_format_rar5_multiarchive.part02.rar", "test_read_format_rar5_multiarchive.part03.rar", "test_read_format_rar5_multiarchive.part04.rar", "test_read_format_rar5_multiarchive.part05.rar", "test_read_format_rar5_multiarchive.part06.rar", "test_read_format_rar5_multiarchive.part07.rar", "test_read_format_rar5_multiarchive.part08.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertA(0 == extract_one(a, ae, 0x35277473)); assertA(0 == archive_read_next_header(a, &ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_skip_all_but_second) { const char* reffiles[] = { "test_read_format_rar5_multiarchive.part01.rar", "test_read_format_rar5_multiarchive.part02.rar", "test_read_format_rar5_multiarchive.part03.rar", "test_read_format_rar5_multiarchive.part04.rar", "test_read_format_rar5_multiarchive.part05.rar", "test_read_format_rar5_multiarchive.part06.rar", "test_read_format_rar5_multiarchive.part07.rar", "test_read_format_rar5_multiarchive.part08.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertA(0 == archive_read_next_header(a, &ae)); assertA(0 == extract_one(a, ae, 0xE59665F8)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_blake2) { const la_ssize_t proper_size = 814; uint8_t buf[proper_size]; PROLOGUE("test_read_format_rar5_blake2.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualInt(proper_size, archive_entry_size(ae)); /* Should blake2 calculation fail, we'll get a failure return * value from archive_read_data(). */ assertA(proper_size == archive_read_data(a, buf, proper_size)); /* To be extra pedantic, let's also check crc32 of the poem. */ assertEqualInt(crc32(0, buf, proper_size), 0x7E5EC49E); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_arm_filter) { /* This test unpacks a file that uses an ARM filter. The DELTA * and X86 filters are tested implicitly in the "multiarchive_skip" * test. */ const la_ssize_t proper_size = 90808; uint8_t buf[proper_size]; PROLOGUE("test_read_format_rar5_arm.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualInt(proper_size, archive_entry_size(ae)); assertA(proper_size == archive_read_data(a, buf, proper_size)); /* Yes, RARv5 unpacker itself should calculate the CRC, but in case * the DONT_FAIL_ON_CRC_ERROR define option is enabled during compilation, * let's still fail the test if the unpacked data is wrong. */ assertEqualInt(crc32(0, buf, proper_size), 0x886F91EB); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_stored_skip_all) { const char* fname = "test_read_format_rar5_stored_manyfiles.rar"; PROLOGUE(fname); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("make_uue.tcl", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_stored_skip_in_part) { const char* fname = "test_read_format_rar5_stored_manyfiles.rar"; char buf[6]; /* Skip first, extract in part rest. */ PROLOGUE(fname); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("make_uue.tcl", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(6 == archive_read_data(a, buf, 6)); assertEqualInt(0, memcmp(buf, "Cebula", 6)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(4 == archive_read_data(a, buf, 4)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_stored_skip_all_but_first) { const char* fname = "test_read_format_rar5_stored_manyfiles.rar"; char buf[405]; /* Extract first, skip rest. */ PROLOGUE(fname); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("make_uue.tcl", archive_entry_pathname(ae)); assertA(405 == archive_read_data(a, buf, sizeof(buf))); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_stored_skip_all_in_part) { const char* fname = "test_read_format_rar5_stored_manyfiles.rar"; char buf[4]; /* Extract in part all */ PROLOGUE(fname); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("make_uue.tcl", archive_entry_pathname(ae)); assertA(4 == archive_read_data(a, buf, 4)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(4 == archive_read_data(a, buf, 4)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(4 == archive_read_data(a, buf, 4)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_solid_skip_all) { const char* reffiles[] = { "test_read_format_rar5_multiarchive_solid.part01.rar", "test_read_format_rar5_multiarchive_solid.part02.rar", "test_read_format_rar5_multiarchive_solid.part03.rar", "test_read_format_rar5_multiarchive_solid.part04.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("elf-Linux-ARMv7-ls", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_solid_skip_all_but_first) { const char* reffiles[] = { "test_read_format_rar5_multiarchive_solid.part01.rar", "test_read_format_rar5_multiarchive_solid.part02.rar", "test_read_format_rar5_multiarchive_solid.part03.rar", "test_read_format_rar5_multiarchive_solid.part04.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7E5EC49E)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("elf-Linux-ARMv7-ls", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } /* "skip_all_but_scnd" -> am I hitting the test name limit here after * expansion of "scnd" to "second"? */ DEFINE_TEST(test_read_format_rar5_multiarchive_solid_skip_all_but_scnd) { const char* reffiles[] = { "test_read_format_rar5_multiarchive_solid.part01.rar", "test_read_format_rar5_multiarchive_solid.part02.rar", "test_read_format_rar5_multiarchive_solid.part03.rar", "test_read_format_rar5_multiarchive_solid.part04.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7CCA70CD)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("elf-Linux-ARMv7-ls", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_solid_skip_all_but_third) { const char* reffiles[] = { "test_read_format_rar5_multiarchive_solid.part01.rar", "test_read_format_rar5_multiarchive_solid.part02.rar", "test_read_format_rar5_multiarchive_solid.part03.rar", "test_read_format_rar5_multiarchive_solid.part04.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7E13B2C6)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("elf-Linux-ARMv7-ls", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_multiarchive_solid_skip_all_but_last) { const char* reffiles[] = { "test_read_format_rar5_multiarchive_solid.part01.rar", "test_read_format_rar5_multiarchive_solid.part02.rar", "test_read_format_rar5_multiarchive_solid.part03.rar", "test_read_format_rar5_multiarchive_solid.part04.rar", NULL }; PROLOGUE_MULTI(reffiles); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("cebula.txt", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("elf-Linux-ARMv7-ls", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x886F91EB)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_solid_skip_all) { const char* reffile = "test_read_format_rar5_solid.rar"; /* Skip all */ PROLOGUE(reffile); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_solid_skip_all_but_first) { const char* reffile = "test_read_format_rar5_solid.rar"; /* Extract first, skip rest */ PROLOGUE(reffile); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7CCA70CD)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_solid_skip_all_but_second) { const char* reffile = "test_read_format_rar5_solid.rar"; /* Skip first, extract second, skip rest */ PROLOGUE(reffile); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7E13B2C6)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_solid_skip_all_but_last) { const char* reffile = "test_read_format_rar5_solid.rar"; /* Skip all but last, extract last */ PROLOGUE(reffile); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x36A448FF)); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_extract_win32) { PROLOGUE("test_read_format_rar5_win32.rar"); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7CCA70CD)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test1.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x7E13B2C6)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test2.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0xF166AFCB)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test3.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x9FB123D9)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test4.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x10C43ED4)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test5.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0xB9D155F2)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test6.bin", archive_entry_pathname(ae)); assertA(0 == extract_one(a, ae, 0x36A448FF)); EPILOGUE(); } DEFINE_TEST(test_read_format_rar5_block_by_block) { /* This test uses strange buffer sizes intentionally. */ struct archive_entry *ae; struct archive *a; uint8_t buf[173]; int bytes_read; uint32_t computed_crc = 0; extract_reference_file("test_read_format_rar5_compressed.rar"); assert((a = archive_read_new()) != NULL); assertA(0 == archive_read_support_filter_all(a)); assertA(0 == archive_read_support_format_all(a)); assertA(0 == archive_read_open_filename(a, "test_read_format_rar5_compressed.rar", 130)); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("test.bin", archive_entry_pathname(ae)); assertEqualInt(1200, archive_entry_size(ae)); /* File size is 1200 bytes, we're reading it using a buffer of 173 bytes. * Libarchive is configured to use a buffer of 130 bytes. */ while(1) { /* archive_read_data should return one of: * a) 0, if there is no more data to be read, * b) negative value, if there was an error, * c) positive value, meaning how many bytes were read. */ bytes_read = archive_read_data(a, buf, sizeof(buf)); assertA(bytes_read >= 0); if(bytes_read <= 0) break; computed_crc = crc32(computed_crc, buf, bytes_read); } assertEqualInt(computed_crc, 0x7CCA70CD); EPILOGUE(); } Index: head/contrib/libarchive =================================================================== --- head/contrib/libarchive (revision 342041) +++ head/contrib/libarchive (revision 342042) Property changes on: head/contrib/libarchive ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libarchive/dist:r341771,342040-342041