diff --git a/ChangeLog b/ChangeLog index f2c2bd3cad3f..c068ad3e3901 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8002 +1,8654 @@ +commit 18b845e69752c975dfeda418ec00eda22605c2ee +Author: Lasse Collin +Date: 2023-01-11 18:52:54 +0200 + + Bump version and soname for 5.4.1. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit 4080bbb844fb36701ffb978f0c41ea2c2c9f8960 +Author: Jia Tan +Date: 2023-01-11 23:58:16 +0800 + + Add NEWS for 5.4.1. + + NEWS | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 70 insertions(+) + +commit 674c89fdb8c457ebc3a0652e235d8b6cd7b7dee2 +Author: Lasse Collin +Date: 2023-01-10 11:56:11 +0200 + + sysdefs.h: Don't include strings.h anymore. + + On some platforms src/xz/suffix.c may need for + strcasecmp() but suffix.c includes the header when it needs it. + + Unless there is an old system that otherwise supports enough C99 + to build XZ Utils but doesn't have C89/C90-compatible , + there should be no need to include in sysdefs.h. + + src/common/sysdefs.h | 6 ------ + 1 file changed, 6 deletions(-) + +commit 2a6b938084fac9ddb39cd69c9beeed15c3b6f6f6 +Author: Lasse Collin +Date: 2023-01-10 11:23:41 +0200 + + xz: Include in suffix.c if needed for strcasecmp(). + + SUSv2 and POSIX.1‐2017 declare only a few functions in . + Of these, strcasecmp() is used on some platforms in suffix.c. + Nothing else in the project needs (at least if + building on a modern system). + + sysdefs.h currently includes if HAVE_STRINGS_H is + defined and suffix.c relied on this. + + Note that dos/config.h doesn't #define HAVE_STRINGS_H even though + DJGPP does have strings.h. It isn't needed with DJGPP as strcasecmp() + is also in in DJGPP. + + src/xz/suffix.c | 3 +++ + 1 file changed, 3 insertions(+) + +commit aea639e81beb548e3114c74b6d9a894d6e036189 +Author: Jia Tan +Date: 2023-01-11 22:46:48 +0800 + + xz: Fix warning -Wformat-nonliteral on clang in message.c. + + clang and gcc differ in how they handle -Wformat-nonliteral. gcc will + allow a non-literal format string as long as the function takes its + format arguments as a va_list. + + src/xz/message.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +commit e3b42bfcb0f67988beee7c7022fed0361282be45 +Author: Jia Tan +Date: 2023-01-11 20:58:31 +0800 + + Tests: Fix test_filter_flags copy/paste error. + + tests/test_filter_flags.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 21625b7e11d004788e40eb5eb88d9d89f65fe347 +Author: Jia Tan +Date: 2023-01-11 20:42:29 +0800 + + Tests: Fix type-limits warning in test_filter_flags. + + This only occurs in test_filter_flags when the BCJ filters are not + configured and built. In this case, ARRAY_SIZE() returns 0 and causes a + type-limits warning with the loop variable since an unsigned number will + always be >= 0. + + tests/test_filter_flags.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +commit c337983e928682d56ce3470b286a8d5b8646e0ad +Author: Lasse Collin +Date: 2023-01-10 22:14:03 +0200 + + liblzma: CLMUL CRC64: Work around a bug in MSVC, second attempt. + + This affects only 32-bit x86 builds. x86-64 is OK as is. + + I still cannot easily test this myself. The reporter has tested + this and it passes the tests included in the CMake build and + performance is good: raw CRC64 is 2-3 times faster than the + C version of the slice-by-four method. (Note that liblzma doesn't + include a MSVC-compatible version of the 32-bit x86 assembly code + for the slice-by-four method.) + + Thanks to Iouri Kharon for figuring out a fix, testing, and + benchmarking. + + src/liblzma/check/crc64_fast.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +commit b7fb438ea0e3ee02e3a164f3b72fae456cbe34d7 +Author: Jia Tan +Date: 2023-01-11 01:18:50 +0800 + + Tests: Fix unused function warning in test_block_header. + + One of the global arrays of filters was only used in a test that + required both encoders and decoders to be configured in the build. + + tests/test_block_header.c | 4 ++++ + 1 file changed, 4 insertions(+) + +commit 68e9ef036d18d7c3952bff0b391d5989b86934da +Author: Jia Tan +Date: 2023-01-11 01:08:03 +0800 + + Tests: Fix unused function warning in test_index_hash. + + test_index_hash does not use fill_index_hash() unless both encoders + and decoders are configured in the build. + + tests/test_index_hash.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +commit a387707cd8cdefbffb5b7429bda4b7fcc192954a +Author: Lasse Collin +Date: 2023-01-10 00:33:14 +0200 + + Windows: Update INSTALL-MSVC.txt to recommend CMake over project files. + + windows/INSTALL-MSVC.txt | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +commit 52902ad69518255a14b0144f0a2379e06fde5b6e +Author: Lasse Collin +Date: 2023-01-10 12:47:16 +0200 + + Revert "liblzma: CLMUL CRC64: Workaround a bug in MSVC (VS2015-2022)." + + This reverts commit 36edc65ab4cf10a131f239acbd423b4510ba52d5. + + It was reported that it wasn't a good enough fix and MSVC + still produced (different kind of) bad code when building + for 32-bit x86 if optimizations are enabled. + + Thanks to Iouri Kharon. + + src/liblzma/check/crc64_fast.c | 6 ------ + 1 file changed, 6 deletions(-) + +commit e81b9fc48ca70f9228308d3f1871cd81f9a5a496 +Author: Lasse Collin +Date: 2023-01-10 10:05:13 +0200 + + sysdefs.h: Fix a comment. + + src/common/sysdefs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 6e89ab58b031aa046308a0b3504ff0a5be042571 +Author: Lasse Collin +Date: 2023-01-10 10:04:06 +0200 + + sysdefs.h: Don't include memory.h anymore even if it were available. + + It quite probably was never needed, that is, any system where memory.h + was required likely couldn't compile XZ Utils for other reasons anyway. + + XZ Utils 5.2.6 and later source packages were generated using + Autoconf 2.71 which no longer defines HAVE_MEMORY_H. So the code + being removed is no longer used anyway. + + src/common/sysdefs.h | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +commit 65c59ad429aa59f9df0326d9fc82931ba4a9d123 +Author: Lasse Collin +Date: 2023-01-10 08:50:26 +0200 + + CMake/Windows: Add a workaround for windres from GNU binutils. + + This is combined from the following commits in the master branch: + 443dfebced041adc88f10d824188eeef5b5821a9 + 6b117d3b1fe91eb26d533ab16a2e552f84148d47 + 5e34774c31d1b7509b5cb77a3be9973adec59ea0 + + Thanks to Iouri Kharon for the bug report, the original patch, + and testing. + + CMakeLists.txt | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +commit 43521e77acc907863fa4f94aae276366172cb9ee +Author: Lasse Collin +Date: 2023-01-06 22:53:38 +0200 + + Tests: test_filter_flags: Clean up minor issues. + + Here are the list of the most significant issues addressed: + - Avoid using internal common.h header. It's not good to copy the + constants like this but common.h cannot be included for use outside + of liblzma. This is the quickest thing to do that could be fixed later. + + - Omit the INIT_FILTER macro. Initialization should be done with just + regular designated initializers. + + - Use start_offset = 257 for BCJ tests. It demonstrates that Filter + Flags encoder and decoder don't validate the options thoroughly. + 257 is valid only for the x86 filter. This is a bit silly but + not a significant problem in practice because the encoder and + decoder initialization functions will catch bad alignment still. + Perhaps this should be fixed but it's not urgent and doesn't need + to be in 5.4.x. + + - Various tweaks to comments such as filter id -> Filter ID + + tests/test_filter_flags.c | 153 +++++++++++++++++++++++----------------------- + 1 file changed, 78 insertions(+), 75 deletions(-) + +commit 6b44cead95d767414272dc3a67898a36bfdf95b3 +Author: Jia Tan +Date: 2022-12-29 23:33:33 +0800 + + Tests: Refactors existing filter flags tests. + + Converts the existing filter flags tests into tuktests. + + tests/test_filter_flags.c | 655 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 457 insertions(+), 198 deletions(-) + +commit 1bbefa9659b202ba31bd244a9d0e4f0d37ff3ed7 +Author: Lasse Collin +Date: 2023-01-08 00:32:29 +0200 + + Tests: tuktest.h: Support tuktest_malloc(0). + + It's not needed in XZ Utils at least for now. It's good to support + it still because if such use is needed later, it wouldn't be + caught on GNU/Linux since malloc(0) from glibc returns non-NULL. + + tests/tuktest.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit ce3a3fbc7c2c399aeed644d54f3bd56ac914dfee +Author: Lasse Collin +Date: 2023-01-07 21:57:11 +0200 + + CMake: Update cmake_minimum_required from 3.13...3.16 to 3.13...3.25. + + The changes listed on cmake-policies(7) for versions 3.17 to 3.25 + shouldn't affect this project. + + CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 99fcd57f2ea35eaa94e09f674d5364329c880fa2 +Author: Lasse Collin +Date: 2023-01-08 00:24:23 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit c0c13d9d82eb8a4302c8bbb8b4c5178d285fe9ab +Author: Lasse Collin +Date: 2023-01-07 19:50:35 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 3d45987451b1c3bb42697b29341824c0e5484cba +Author: Lasse Collin +Date: 2023-01-09 11:27:24 +0200 + + CMake: Fix a copypaste error in xzdec Windows resource file handling. + + It was my mistake. Thanks to Iouri Kharon for the bug report. + + CMakeLists.txt | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 706bce5018d7cf83094e13454a0731169ec119b5 +Author: Lasse Collin +Date: 2023-01-07 19:50:03 +0200 + + CMake/Windows: Add resource files to xz.exe and xzdec.exe. + + The command line tools cannot be built with MSVC for now but + they can be built with MinGW-w64. + + Thanks to Iouri Kharon for the bug report and the original patch. + + CMakeLists.txt | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +commit e96dee55df04113c33b387ccdb6cb70935422d91 +Author: Lasse Collin +Date: 2023-01-09 12:22:05 +0200 + + liblzma: CLMUL CRC64: Workaround a bug in MSVC (VS2015-2022). + + I haven't tested with MSVC myself and there doesn't seem to be + information about the problem online, so I'm relying on the bug report. + + Thanks to Iouri Kharon for the bug report and the patch. + + src/liblzma/check/crc64_fast.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +commit 52bc1ee34dda9bb6fb40175e5952863066681b77 +Author: Lasse Collin +Date: 2023-01-07 19:31:15 +0200 + + Build: Require that _mm_set_epi64x() is usable to enable CLMUL support. + + VS2013 doesn't have _mm_set_epi64x() so this way CLMUL gets + disabled with VS2013. + + Thanks to Iouri Kharon for the bug report. + + CMakeLists.txt | 3 ++- + configure.ac | 8 ++++++-- + 2 files changed, 8 insertions(+), 3 deletions(-) + +commit bad44cfe19e8be8ae76755369be2a34abcd2b4fa +Author: Jia Tan +Date: 2022-12-29 00:25:18 +0800 + + Tests: Creates test_index_hash.c + + Tests all API functions exported from index_hash.h. Does not have a + dedicated test for lzma_index_hash_end. + + [Minor edits were made by Lasse Collin.] + + .gitignore | 1 + + CMakeLists.txt | 2 + + tests/Makefile.am | 3 + + tests/test_index_hash.c | 388 ++++++++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 394 insertions(+) + +commit 692ccdf5516dfe55fb6e9c5cdfb31f4c02c1ecd1 +Author: Jia Tan +Date: 2023-01-05 20:57:25 +0800 + + liblzma: Remove common.h include from common/index.h. + + common/index.h is needed by liblzma internally and tests. common.h will + include and define many things that are not needed by the tests. + + Also, this prevents include order problems because both common.h and + lzma.h define LZMA_API. On most platforms it results only in a warning + but on Windows it would break the build as the definition in common.h + must be used only for building liblzma itself. + + src/liblzma/common/index.c | 1 + + src/liblzma/common/index.h | 9 +++++++-- + src/liblzma/common/index_decoder.h | 1 + + src/liblzma/common/stream_buffer_encoder.c | 1 + + 4 files changed, 10 insertions(+), 2 deletions(-) + +commit 2ac7bafc8f07c1edefe96a4a7a040ddfff0eb5bb +Author: Jia Tan +Date: 2022-08-17 20:20:16 +0800 + + liblzma: Add NULL check to lzma_index_hash_append. + + This is for consistency with lzma_index_append. + + src/liblzma/common/index_hash.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit db714d30e0c74d1dd4af1a23ed62b44e0e8e4efc +Author: Jia Tan +Date: 2022-08-17 17:59:51 +0800 + + liblzma: Replaced hardcoded 0x0 index indicator byte with macro + + src/liblzma/common/index.h | 3 +++ + src/liblzma/common/index_decoder.c | 2 +- + src/liblzma/common/index_encoder.c | 2 +- + src/liblzma/common/index_hash.c | 2 +- + src/liblzma/common/stream_decoder.c | 3 ++- + src/liblzma/common/stream_decoder_mt.c | 2 +- + 6 files changed, 9 insertions(+), 5 deletions(-) + +commit 39d2585dcd3e827cfc3c46025ab6708c4aeb36c6 +Author: Jia Tan +Date: 2023-01-06 20:43:31 +0800 + + Style: Change #if !defined() to #ifndef in mythread.h. + + src/common/mythread.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 3f0130aa288e4ed57ace609517db9700a41223af +Author: Jia Tan +Date: 2023-01-06 20:35:55 +0800 + + Build: Add missing stream_decoder_mt.c to .vcxproj files. + + The line in the .vcxproj files for building with was missing in 5.4.0. + Thank to Hajin Jang for reporting the issue. + + windows/vs2013/liblzma.vcxproj | 1 + + windows/vs2013/liblzma_dll.vcxproj | 1 + + windows/vs2017/liblzma.vcxproj | 1 + + windows/vs2017/liblzma_dll.vcxproj | 1 + + windows/vs2019/liblzma.vcxproj | 1 + + windows/vs2019/liblzma_dll.vcxproj | 1 + + 6 files changed, 6 insertions(+) + +commit f204d1050a515d17851eed9049862ce5a9c729c1 +Author: Lasse Collin +Date: 2023-01-04 22:40:54 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 34a9c2d650d6c30bd88e1b21910dd863209aa884 +Author: Lasse Collin +Date: 2023-01-04 18:40:28 +0200 + + Tests: Adjust style in test_compress.sh. + + tests/test_compress.sh | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +commit 761c208d58e0c3daa0f46e68b406adfc318d2a46 +Author: Jia Tan +Date: 2023-01-04 23:58:58 +0800 + + Tests: Replace non portable shell parameter expansion + + The shell parameter expansion using # and ## is not supported in + Solaris 10 Bourne shell (/bin/sh). Even though this is POSIX, it is not fully + portable, so we should avoid it. + + tests/create_compress_files.c | 2 +- + tests/test_compress.sh | 20 +++++++++++++------- + tests/test_compress_prepared_bcj_sparc | 2 +- + tests/test_compress_prepared_bcj_x86 | 2 +- + 4 files changed, 16 insertions(+), 10 deletions(-) + +commit 8a7cbc074547e55e57f4f3696f69bedeb05e14c4 +Author: Jia Tan +Date: 2023-01-03 21:02:38 +0800 + + Translations: Add Korean translation of man pages. + + Thanks to Seong-ho Cho + + po4a/ko.po | 5552 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + po4a/po4a.conf | 2 +- + 2 files changed, 5553 insertions(+), 1 deletion(-) + +commit ca2af49bb8be5995eb0e6a3abf457622626d49a7 +Author: Jia Tan +Date: 2023-01-03 20:47:27 +0800 + + Translations: Update the Esperanto translation. + + po/eo.po | 620 ++++++++++++++++++++++++++++++++++----------------------------- + 1 file changed, 332 insertions(+), 288 deletions(-) + +commit bfba3394aed03311fe9a746d3141b2e16d8b9325 +Author: Lasse Collin +Date: 2023-01-02 17:05:07 +0200 + + Build: Fix config.h comments. + + configure.ac | 2 +- + m4/tuklib_progname.m4 | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit 507648ad114c2ae0cd6d181063e1ac07e8106718 +Author: Jia Tan +Date: 2023-01-02 22:33:48 +0800 + + Build: Only define HAVE_PROGRAM_INVOCATION_NAME if it is set to 1. + + HAVE_DECL_PROGRAM_INVOCATION_NAME is renamed to + HAVE_PROGRAM_INVOCATION_NAME. Previously, + HAVE_DECL_PROGRAM_INVOCATION_NAME was always set when + building with autotools. CMake would only set this when it was 1, and the + dos/config.h did not define it. The new macro definition is consistent + across build systems. + + cmake/tuklib_progname.cmake | 5 ++--- + m4/tuklib_progname.m4 | 5 ++++- + src/common/tuklib_progname.c | 2 +- + src/common/tuklib_progname.h | 2 +- + 4 files changed, 8 insertions(+), 6 deletions(-) + +commit ab5229d32adfec1f3fbc95228d9dd6f560732ab5 +Author: Lasse Collin +Date: 2022-12-30 20:10:08 +0200 + + Tests: test_check: Test corner cases of CLMUL CRC64. + + tests/test_check.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +commit 8791826f31733fda0a13b411c2ed930faaeb25aa +Author: Lasse Collin +Date: 2022-12-30 19:36:49 +0200 + + Tests: Clarify a comment in test_lzip_decoder.c. + + tests/test_lzip_decoder.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +commit c410d812ea12bfc74f6b727c1a799478c79f19ca +Author: Jia Tan +Date: 2022-12-29 01:55:19 +0800 + + xz: Includes and conditionally in mytime.c. + + Previously, mytime.c depended on mythread.h for to be included. + + src/xz/mytime.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +commit 501c6013d4a59fae5d4368e9657c4885493db809 +Author: Jia Tan +Date: 2022-12-29 01:15:27 +0800 + + liblzma: Includes sys/time.h conditionally in mythread + + Previously, was always included, even if mythread only used + clock_gettime. is still needed even if clock_gettime is not used + though because struct timespec is needed for mythread_condtime. + + src/common/mythread.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +commit 9e3cb514b5b95bd235dcdff3db4436f57444ee4f +Author: Jia Tan +Date: 2022-12-29 01:10:53 +0800 + + Build: No longer require HAVE_DECL_CLOCK_MONOTONIC to always be set. + + Previously, if threading was enabled HAVE_DECL_CLOCK_MONOTONIC would always + be set to 0 or 1. However, this macro was needed in xz so if xz was not + built with threading and HAVE_DECL_CLOCK_MONOTONIC was not defined but + HAVE_CLOCK_GETTIME was, it caused a warning during build. Now, + HAVE_DECL_CLOCK_MONOTONIC has been renamed to HAVE_CLOCK_MONOTONIC and + will only be set if it is 1. + + CMakeLists.txt | 8 +++----- + configure.ac | 5 ++++- + src/common/mythread.h | 4 ++-- + src/xz/mytime.c | 5 ++--- + 4 files changed, 11 insertions(+), 11 deletions(-) + +commit 6fc3e5467911572fa9af4021ea46396261aae796 +Author: Jia Tan +Date: 2022-12-28 01:14:07 +0800 + + Translations: Add Ukrainian translations of man pages. + + Thanks to Yuri Chornoivan + + po4a/po4a.conf | 2 +- + po4a/uk.po | 3676 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 3677 insertions(+), 1 deletion(-) + +commit e84f2ab7f8bc38cd8f8befa0bb398656c3c11f8e +Author: Jia Tan +Date: 2022-12-22 23:14:53 +0800 + + liblzma: Update documentation for lzma_filter_encoder. + + src/liblzma/common/filter_encoder.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +commit b14b8dbba9a3b232787ae218b46430b9246383dd +Author: Jia Tan +Date: 2022-12-21 21:12:03 +0800 + + Tests: Adds lzip decoder tests + + .gitignore | 1 + + tests/Makefile.am | 2 + + tests/test_lzip_decoder.c | 471 ++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 474 insertions(+) + +commit 09a114805e1d4f9a02a06cee7dbf2f5014d1f710 +Author: Jia Cheong Tan +Date: 2022-12-20 22:05:21 +0800 + + Doxygen: Update .gitignore for generating docs for in source build. + + In source builds are not recommended, but we should still ignore + the generated artifacts. + + .gitignore | 2 ++ + 1 file changed, 2 insertions(+) + +commit d3e6fe44196bf9478ad193522e2b48febf2eca6b +Author: Jia Tan +Date: 2022-12-20 20:46:44 +0800 + + liblzma: Fix lzma_microlzma_encoder() return value. + + Using return_if_error on lzma_lzma_lclppb_encode was improper because + return_if_error is expecting an lzma_ret value, but + lzma_lzma_lclppb_encode returns a boolean. This could result in + lzma_microlzma_encoder, which would be misleading for applications. + + src/liblzma/common/microlzma_encoder.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +commit b55a27b46f52524a4a8d9cdef03e6689cefc1375 +Author: Lasse Collin +Date: 2022-12-16 18:30:02 +0200 + + liblzma: Update authors list in arm64.c. + + src/liblzma/simple/arm64.c | 1 + + 1 file changed, 1 insertion(+) + +commit 2fd28d2b7cec3468324a6f15eff7e73c285b1d7d +Author: Jia Tan +Date: 2022-12-16 20:58:55 +0800 + + CMake: Update .gitignore for CMake artifacts from in source build. + + In source builds are not recommended, but we can make it easier + by ignoring the generated artifacts from CMake. + + .gitignore | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + commit b69da6d4bb6bb11fc0cf066920791990d2b22a06 Author: Lasse Collin Date: 2022-12-13 20:37:17 +0200 Bump version to 5.4.0 and soname to 5.4.0. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- src/liblzma/liblzma_generic.map | 2 +- src/liblzma/liblzma_linux.map | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) commit 20869eb3fb280ff4f271ef527b12b6bf68b05e19 Author: Lasse Collin Date: 2022-12-13 20:29:39 +0200 Update INSTALL: CMake on Windows isn't experimental anymore. Using CMake to build liblzma should work on a few other OSes but building the command line tools is still subtly broken. It is known that shared library versioning may differ between CMake and Libtool builds on some OSes, most notably Darwin. INSTALL | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) commit cbbd84451944e3e8c63acfaa3c923f6d8aff7852 Author: Lasse Collin Date: 2022-12-13 19:47:53 +0200 Add NEWS for 5.4.0. NEWS | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) commit c3e94d37e8d10a3e96019864b6f5d7b578db2c14 Author: Lasse Collin Date: 2022-12-13 17:41:20 +0200 Fix a typo in NEWS. NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 0d2a2e0a545c3da2b3e9500f1e531eb903087245 Author: Lasse Collin Date: 2022-12-13 17:41:03 +0200 Add NEWS for 5.2.10. NEWS | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit 177ece1c8eb007188fb1b04eff09ca2193fbdea6 Author: Lasse Collin Date: 2022-12-13 12:30:45 +0200 Tests: Fix a typo in tests/files/README. tests/files/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 60f45bdbaa6b07558b3f4baac285739b0c6342f5 Author: Lasse Collin Date: 2022-12-13 12:30:09 +0200 Tests: Add two ARM64 test files. tests/files/README | 7 +++++++ tests/files/good-1-arm64-lzma2-1.xz | Bin 0 -> 512 bytes tests/files/good-1-arm64-lzma2-2.xz | Bin 0 -> 488 bytes tests/test_files.sh | 5 +++++ 4 files changed, 12 insertions(+) commit f5e419550619c548c7c35d7e367cf00580a56521 Author: Lasse Collin Date: 2022-12-12 22:44:21 +0200 Translations: Update the Catalan translation. po/ca.po | 657 +++++++++++++++++++++++++++++---------------------------------- 1 file changed, 306 insertions(+), 351 deletions(-) commit 0fb9d355da3789b1757040af475b4e6bbc8b8af8 Author: Lasse Collin Date: 2022-12-12 19:18:12 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e5b6c161c61a37c54dcb76a99bbb83ac4abe02dc Author: Lasse Collin Date: 2022-12-12 19:07:58 +0200 Update AUTHORS. AUTHORS | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit f2d98e691099d82054d5f3071ef6f5e809932e44 Author: Lasse Collin Date: 2022-12-12 15:31:14 +0200 Docs: Omit multi-threaded decompress from TODO. The TODO file outdated still. TODO | 2 -- 1 file changed, 2 deletions(-) commit b42908c42a4cc091db45a7e5ba0e0ecceaa3f6da Author: Lasse Collin Date: 2022-12-11 21:16:09 +0200 Docs: Update xz-file-format.txt to 1.1.0 for ARM64 filter. doc/xz-file-format.txt | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) commit 854f2f5946b353cb0963fd6dfd54d363adc89b9f Author: Lasse Collin Date: 2022-12-11 21:13:57 +0200 xz: Rename --experimental-arm64 to --arm64. src/xz/args.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 31dbd1e5fb65831915a7bbb531c3f19aea8d57a5 Author: Lasse Collin Date: 2022-12-11 21:13:06 +0200 liblzma: Change LZMA_FILTER_ARM64 to the official Filter ID 0x0A. src/liblzma/api/lzma/bcj.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) commit 01b3549e523edac899ec4925b282ceddd20da116 Author: Lasse Collin Date: 2022-12-08 19:24:22 +0200 xz: Make args_info.files_name a const pointer. src/xz/args.c | 2 +- src/xz/args.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit bc665b84ea6bf7946394a08122177efe41b26a5f Author: Lasse Collin Date: 2022-12-08 19:18:16 +0200 xz: Don't modify argv[]. The code that parses --memlimit options and --block-list modified the argv[] when parsing the option string from optarg. This was visible in "ps auxf" and such and could be confusing. I didn't understand it back in the day when I wrote that code. Now a copy is allocated when modifiable strings are needed. src/xz/args.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) commit a13064e1c290de7933db72b6dffbd65cfce59c9f Author: Lasse Collin Date: 2022-12-08 18:18:57 +0200 Translations: Update the German man page translations. po4a/de.po | 4570 ++++++++++++++++++------------------------------------------ 1 file changed, 1374 insertions(+), 3196 deletions(-) commit 8bdbe42a8d0d75dff70206b923fc4bce5c69a40a Author: Jia Tan Date: 2022-12-06 23:05:56 +0800 Translations: Update the German translation. po/de.po | 586 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 315 insertions(+), 271 deletions(-) commit 5c304b57c24ef40ff57f864301065f0244c05bde Author: Jia Tan Date: 2022-12-06 23:04:25 +0800 Translations: Update the Turkish translation. po/tr.po | 221 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 108 insertions(+), 113 deletions(-) commit 6d86781fdb937970486500447ebb49b98244235b Author: Jia Tan Date: 2022-12-06 23:02:11 +0800 Translations: Update the Croatian translation. po/hr.po | 228 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 113 insertions(+), 115 deletions(-) commit 7a5b4b8075eb36026b1796f04ffed5830c42396a Author: Jia Tan Date: 2022-12-06 22:56:30 +0800 Translations: Add Romanian translation of man pages. Thanks to Remus-Gabriel Chelu. po4a/po4a.conf | 2 +- po4a/ro.po | 3692 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3693 insertions(+), 1 deletion(-) commit c6977e7400088177556e8771bcb839eb7d90caa3 Author: Jia Tan Date: 2022-12-06 22:52:13 +0800 Translations: Update the Romanian translation. po/ro.po | 294 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 147 insertions(+), 147 deletions(-) commit ac2a747e939c2cbccff7a49c399769af5e02d2ab Author: Lasse Collin Date: 2022-12-08 17:30:09 +0200 liblzma: Check for unexpected NULL pointers in block_header_decode(). The API docs gave an impression that such checks are done but they actually weren't done. In practice it made little difference since the calling code has a bug if these are NULL. Thanks to Jia Tan for the original patch that checked for block->filters == NULL. src/liblzma/common/block_header_decoder.c | 4 ++++ 1 file changed, 4 insertions(+) commit 24790f49ae66938c1c7574315e1c0aba1ed5ed25 Author: Lasse Collin Date: 2022-12-01 20:59:32 +0200 Bump version number for 5.3.5beta. This also sorts the symbol names alphabetically in liblzma_*.map. src/liblzma/api/lzma/version.h | 4 ++-- src/liblzma/liblzma_generic.map | 10 +++++----- src/liblzma/liblzma_linux.map | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) commit 7e53c5bcb3c2c17f47c096c06ff6b1481e6ecafa Author: Lasse Collin Date: 2022-12-01 20:57:26 +0200 Add NEWS for 5.3.5beta. NEWS | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) commit 5865f2aaac326fcbd9f8a7d62defa230e4cb644e Author: Lasse Collin Date: 2022-12-01 20:57:09 +0200 Update THANKS. THANKS | 3 +++ 1 file changed, 3 insertions(+) commit 62b270988ec67314d69976df484d2974c6eacfda Author: Lasse Collin Date: 2022-12-01 20:04:17 +0200 liblzma: Use __has_attribute(__symver__) to fix Clang detection. If someone sets up Clang to define __GNUC__ to 10 or greater then symvers broke. __has_attribute is supported by such GCC and Clang versions that don't support __symver__ so this should be much better and simpler way to detect if __symver__ is actually supported. Thanks to Tomasz Gajc for the bug report. src/liblzma/common/common.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) commit f9ca7d45162664ddd9fb70e19335c2426e5d75bb Author: Lasse Collin Date: 2022-12-01 18:51:52 +0200 liblzma: Omit zero-skipping from ARM64 filter. It has some complicated downsides and its usefulness is more limited than I originally thought. So this change is bad for certain very specific situations but a generic solution that works for other filters (and is otherwise better too) is planned anyway. And this way 7-Zip can use the same compatible filter for the .7z format. This is still marked as experimental with a new temporary Filter ID. src/liblzma/api/lzma/bcj.h | 2 +- src/liblzma/simple/arm64.c | 81 +++++++++++++--------------------------------- 2 files changed, 24 insertions(+), 59 deletions(-) commit 5baec3f0a9c85e6abf45c0f652f699b074129a8b Author: Lasse Collin Date: 2022-12-01 18:13:27 +0200 xz: Omit the special notes about ARM64 filter on the man page. src/xz/xz.1 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) commit 0c3627b51862eb0dcdd4fc283d046250571991c6 Author: Lasse Collin Date: 2022-12-01 18:12:03 +0200 liblzma: Don't be over-specific in lzma_str_to_filters API doc. src/liblzma/api/lzma/filter.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit 94adf057f27b1970f493dc99cd166407d7255639 Author: Lasse Collin Date: 2022-12-01 17:54:23 +0200 liblzma: Silence unused variable warning when BCJ filters are disabled. Thanks to Jia Tan for the original patch. src/liblzma/common/string_conversion.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) commit c68af4441744e5ffc41a472e1be9c9d53a1d9780 Author: Lasse Collin Date: 2022-12-01 17:38:03 +0200 Translations: Update the Chinese (simplified) translation. po/zh_CN.po | 608 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 348 insertions(+), 260 deletions(-) commit 3be6942e5c27d29995d41da52fbe274e4ce4a537 Author: Lasse Collin Date: 2022-11-30 18:55:03 +0200 Add NEWS for 5.2.9. NEWS | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) commit 7c16e312cb2f40b81154c0e5be13a3c6b8da485d Author: Jia Tan Date: 2022-11-30 23:33:08 +0800 xz: Remove message_filters_to_str function prototype from message.h. This was forgotten from 7484744af6cbabe81e92af7d9e061dfd597fff7b. src/xz/message.h | 16 ---------------- 1 file changed, 16 deletions(-) commit 764955e2d4f2a5e8d6d6fec63af694f799e050e7 Author: Lasse Collin Date: 2022-11-30 18:08:34 +0200 Change the bug report address. It forwards to me and Jia Tan. Also update the IRC reference in README as #tukaani was moved to Libera Chat long ago. CMakeLists.txt | 2 +- README | 11 +++++------ configure.ac | 2 +- dos/config.h | 2 +- windows/README-Windows.txt | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) commit c21983c76031e01da01ad3c6cc716fe4b8a75070 Author: Lasse Collin Date: 2022-11-30 17:50:17 +0200 Build: Add string_conversion.c to CMake, DOS, and VS files. CMakeLists.txt | 1 + dos/Makefile | 1 + windows/vs2013/liblzma.vcxproj | 1 + windows/vs2013/liblzma_dll.vcxproj | 1 + windows/vs2017/liblzma.vcxproj | 1 + windows/vs2017/liblzma_dll.vcxproj | 1 + windows/vs2019/liblzma.vcxproj | 1 + windows/vs2019/liblzma_dll.vcxproj | 1 + 8 files changed, 8 insertions(+) commit 30be0c35d24eb5175459d69dbf7d92e2b087ef82 Author: Lasse Collin Date: 2022-11-30 17:38:32 +0200 Update to HTTPS URLs in AUTHORS. AUTHORS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 0a72b9ca2fe20082da9b7128fe0d908af947a851 Author: Jia Tan Date: 2022-11-30 00:52:06 +0800 liblzma: Improve documentation for string to filter functions. src/liblzma/api/lzma/filter.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) commit a6e21fcede3b196160a52dd294d965c508a4bb33 Author: Lasse Collin Date: 2022-11-29 22:27:42 +0200 liblzma: Two fixes to lzma_str_list_filters() API docs. Thanks to Jia Tan. src/liblzma/api/lzma/filter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 7484744af6cbabe81e92af7d9e061dfd597fff7b Author: Lasse Collin Date: 2022-11-28 21:57:47 +0200 xz: Use lzma_str_from_filters(). Two uses: Displaying encoder filter chain when compressing with -vv, and displaying the decoder filter chain in --list -vv. src/xz/list.c | 28 ++++++--- src/xz/message.c | 175 +++---------------------------------------------------- 2 files changed, 28 insertions(+), 175 deletions(-) commit cedeeca2ea6ada5b0411b2ae10d7a859e837f203 Author: Lasse Collin Date: 2022-11-28 21:37:48 +0200 liblzma: Add lzma_str_to_filters, _from_filters, and _list_filters. lzma_str_to_filters() uses static error messages which makes them not very precise. It tells the position in the string where an error occurred though which helps quite a bit if applications take advantage of it. Dynamic error messages can be added later with a new flag if it seems important enough. src/liblzma/api/lzma/filter.h | 258 +++++++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/string_conversion.c | 1302 ++++++++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 3 + src/liblzma/liblzma_linux.map | 3 + 5 files changed, 1567 insertions(+) commit 072ebf7b1335421193ffa9d4a70d5533786b8995 Author: Lasse Collin Date: 2022-11-28 21:02:19 +0200 liblzma: Make lzma_validate_chain() available outside filter_common.c. src/liblzma/common/filter_common.c | 8 ++++---- src/liblzma/common/filter_common.h | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) commit 5f22bd2d37e3bd01a5d701b51750eb51f09c11bf Author: Lasse Collin Date: 2022-11-28 10:51:03 +0200 liblzma: Remove lzma_lz_decoder_uncompressed() as it's now unused. src/liblzma/lz/lz_decoder.c | 14 -------------- src/liblzma/lz/lz_decoder.h | 3 --- 2 files changed, 17 deletions(-) commit cee83206465b95729ab649aa2f57fdbde8dcaf89 Author: Lasse Collin Date: 2022-11-28 10:48:53 +0200 liblzma: Use LZMA1EXT feature in lzma_microlzma_decoder(). Here too this avoids the slightly ugly method to set the uncompressed size. Also moved the setting of dict_size to the struct initializer. src/liblzma/common/microlzma_decoder.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) commit e310e8b6a490dfb468f4ed68feff246d776b323c Author: Lasse Collin Date: 2022-11-28 10:28:20 +0200 liblzma: Use LZMA1EXT feature in lzma_alone_decoder(). This avoids the need to use the slightly ugly method to set the uncompressed size. src/liblzma/common/alone_decoder.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit 33b8a24b6646a9dbfd8358405aec466b13078559 Author: Lasse Collin Date: 2022-11-27 23:16:21 +0200 liblzma: Add LZMA_FILTER_LZMA1EXT to support LZMA1 without end marker. Some file formats need support for LZMA1 streams that don't use the end of payload marker (EOPM) alias end of stream (EOS) marker. So far liblzma API has supported decompressing such streams via lzma_alone_decoder() when .lzma header specifies a known uncompressed size. Encoding support hasn't been available in the API. Instead of adding a new LZMA1-only API for this purpose, this commit adds a new filter ID for use with raw encoder and decoder. The main benefit of this approach is that then also filter chains are possible, for example, if someone wants to implement support for .7z files that use the x86 BCJ filter with LZMA1 (not BCJ2 as that isn't supported in liblzma). src/liblzma/api/lzma/lzma12.h | 123 ++++++++++++++++++++++++++++++-- src/liblzma/common/filter_common.c | 7 ++ src/liblzma/common/filter_decoder.c | 6 ++ src/liblzma/common/filter_encoder.c | 9 +++ src/liblzma/lzma/lzma2_encoder.c | 2 +- src/liblzma/lzma/lzma_decoder.c | 26 ++++++- src/liblzma/lzma/lzma_encoder.c | 40 +++++++++-- src/liblzma/lzma/lzma_encoder.h | 3 +- src/liblzma/lzma/lzma_encoder_private.h | 3 + 9 files changed, 204 insertions(+), 15 deletions(-) commit 9a304bf1e45b3ddf61aaeaa7c764915b34618ede Author: Lasse Collin Date: 2022-11-27 18:43:07 +0200 liblzma: Avoid unneeded use of void pointer in LZMA decoder. src/liblzma/lzma/lzma_decoder.c | 3 +-- src/liblzma/lzma/lzma_decoder.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) commit 218394958c7683f892275bb40eae880620feebcc Author: Lasse Collin Date: 2022-11-27 18:20:33 +0200 liblzma: Pass the Filter ID to LZ encoder and decoder. This allows using two Filter IDs with the same initialization function and data structures. src/liblzma/common/alone_decoder.c | 1 + src/liblzma/common/alone_encoder.c | 1 + src/liblzma/common/common.h | 7 +++++-- src/liblzma/common/lzip_decoder.c | 1 + src/liblzma/common/microlzma_decoder.c | 1 + src/liblzma/common/microlzma_encoder.c | 1 + src/liblzma/lz/lz_decoder.c | 5 +++-- src/liblzma/lz/lz_decoder.h | 3 ++- src/liblzma/lz/lz_encoder.c | 5 +++-- src/liblzma/lz/lz_encoder.h | 3 ++- src/liblzma/lzma/lzma2_decoder.c | 3 ++- src/liblzma/lzma/lzma2_encoder.c | 3 ++- src/liblzma/lzma/lzma_decoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- 14 files changed, 26 insertions(+), 12 deletions(-) commit 1663c7676b76f4c514031797f3db1896e8100f7f Author: Lasse Collin Date: 2022-11-27 01:03:16 +0200 liblzma: Remove two FIXME comments. src/liblzma/common/filter_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 11fe708db783ac36ebeeb85da164e29e8c300910 Author: Lasse Collin Date: 2022-11-26 22:25:30 +0200 xz: Use lzma_filters_free(). src/xz/list.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit e782af9110d8499c7ac2929bc871540eefea5ea1 Author: Lasse Collin Date: 2022-11-26 22:21:13 +0200 liblzma: Use lzma_filters_free() in more places. src/liblzma/common/block_header_decoder.c | 20 ++------------------ src/liblzma/common/stream_decoder.c | 4 +--- src/liblzma/common/stream_decoder_mt.c | 22 +++++----------------- 3 files changed, 8 insertions(+), 38 deletions(-) commit 90caaded2dc6db1d6a55b01160d7e87f4a423628 Author: Lasse Collin Date: 2022-11-25 18:04:37 +0200 liblzma: Omit simple coder init functions if they are disabled. src/liblzma/simple/arm.c | 4 ++++ src/liblzma/simple/armthumb.c | 4 ++++ src/liblzma/simple/ia64.c | 4 ++++ src/liblzma/simple/powerpc.c | 4 ++++ src/liblzma/simple/sparc.c | 4 ++++ src/liblzma/simple/x86.c | 4 ++++ 6 files changed, 24 insertions(+) commit 5cd9f0df78cc4f8a7807bf6104adea13034fbb45 Author: Lasse Collin Date: 2022-11-24 23:24:59 +0200 xz: Allow nice_len 2 and 3 even if match finder requires 3 or 4. Now that liblzma accepts these, we avoid the extra check and there's one message less for translators too. src/xz/options.c | 5 ----- 1 file changed, 5 deletions(-) commit 3be88ae071371caa279b44e13f4836fb178fe4ae Author: Lasse Collin Date: 2022-11-24 23:23:55 +0200 liblzma: Allow nice_len 2 and 3 even if match finder requires 3 or 4. That is, if the specified nice_len is smaller than the minimum of the match finder, silently use the match finder's minimum value instead of reporting an error. The old behavior is annoying to users and it complicates xz options handling too. src/liblzma/lz/lz_encoder.c | 14 +++++++++----- src/liblzma/lz/lz_encoder.h | 9 +++++++++ src/liblzma/lzma/lzma_encoder.c | 11 ++++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) commit 93439cfafe1768b3b18d67d2356ef7e7559bba59 Author: Lasse Collin Date: 2022-11-24 16:25:10 +0200 liblzma: Add lzma_filters_update() support to the multi-threaded encoder. A tiny downside of this is that now a 1-4 tiny allocations are made for every Block because each worker thread needs its own copy of the filter chain. src/liblzma/api/lzma/filter.h | 36 +++++++------ src/liblzma/common/stream_encoder_mt.c | 96 +++++++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 23 deletions(-) commit 17ac51e689794eb41cab3e80946fec689caea2d2 Author: Lasse Collin Date: 2022-11-24 14:53:22 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 06824396b2b6c84f3a235cb7c19c2a9701167797 Author: Lasse Collin Date: 2022-11-24 14:52:44 +0200 Build: Don't put GNU/Linux-specific symbol versions into static liblzma. It not only makes no sense to put symbol versions into a static library but it can also cause breakage. By default Libtool #defines PIC if building a shared library and doesn't define it for static libraries. This is documented in the Libtool manual. It can be overriden using --with-pic or --without-pic. configure.ac detects if --with-pic or --without-pic is used and then gives an error if neither --disable-shared nor --disable-static was used at the same time. Thus, in normal situations it works to build both shared and static library at the same time on GNU/Linux, only --with-pic or --without-pic requires that only one type of library is built. Thanks to John Paul Adrian Glaubitz from Debian for reporting the problem that occurred on ia64: https://www.mail-archive.com/xz-devel@tukaani.org/msg00610.html CMakeLists.txt | 5 +- configure.ac | 143 +++++++++++++++++++++++++++++--------------- src/liblzma/common/common.h | 12 ++++ 3 files changed, 111 insertions(+), 49 deletions(-) commit e1acf7107291f8b3d6d609a7133331ff36d35d14 Author: Lasse Collin Date: 2022-11-24 01:32:16 +0200 liblzma: Refactor to use lzma_filters_free(). lzma_filters_free() sets the options to NULL and ids to LZMA_VLI_UNKNOWN so there is no need to do it by caller; the filter arrays will always be left in a safe state. Also use memcpy() instead of a loop to copy a filter chain when it is known to be safe to copy LZMA_FILTERS_MAX + 1 (even if the elements past the terminator might be uninitialized). src/liblzma/common/stream_encoder.c | 16 ++++------------ src/liblzma/common/stream_encoder_mt.c | 11 ++--------- 2 files changed, 6 insertions(+), 21 deletions(-) commit cb05dbcf8b868441ec805016222f3fd77f1c5caa Author: Lasse Collin Date: 2022-11-24 01:26:37 +0200 liblzma: Fix another invalid free() after memory allocation failure. This time it can happen when lzma_stream_encoder_mt() is used to reinitialize an existing multi-threaded Stream encoder and one of 1-4 tiny allocations in lzma_filters_copy() fail. It's very similar to the previous bug 10430fbf3820dafd4eafd38ec8be161a6978ed2b, happening with an array of lzma_filter structures whose old options are freed but the replacement never arrives due to a memory allocation failure in lzma_filters_copy(). src/liblzma/common/stream_encoder_mt.c | 4 ++++ 1 file changed, 4 insertions(+) commit 75f1a6c26df4ce329da0882786403e3ccf5cd898 Author: Jia Tan Date: 2022-05-05 20:53:42 +0800 liblzma: Add support for LZMA_SYNC_FLUSH in the Block encoder. The documentation mentions that lzma_block_encoder() supports LZMA_SYNC_FLUSH but it was never added to supported_actions[] in the internal structure. Because of this, LZMA_SYNC_FLUSH could not be used with the Block encoder unless it was the next coder after something like stream_encoder() or stream_encoder_mt(). src/liblzma/common/block_encoder.c | 1 + 1 file changed, 1 insertion(+) commit d0901645170b638c517f5c50866b6ef48f491c65 Author: Lasse Collin Date: 2022-11-24 01:02:50 +0200 liblzma: Add new API function lzma_filters_free(). This is small but convenient and should have been added a long time ago. src/liblzma/api/lzma/filter.h | 21 +++++++++++++++++++++ src/liblzma/common/filter_common.c | 26 ++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 1 + src/liblzma/liblzma_linux.map | 1 + 4 files changed, 49 insertions(+) commit ae1f8a723dcde2f2c5cf444bcbb5fc5026b3c3c5 Author: Lasse Collin Date: 2022-11-24 00:02:31 +0200 CMake: Don't use symbol versioning with static library. CMakeLists.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 48c1b99dc537a27e1ca929d8837e778e5ba32191 Author: Lasse Collin Date: 2022-11-23 21:55:22 +0200 liblzma: Add lzma_attr_warn_unused_result to lzma_filters_copy(). src/liblzma/api/lzma/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 10430fbf3820dafd4eafd38ec8be161a6978ed2b Author: Lasse Collin Date: 2022-11-23 21:26:21 +0200 liblzma: Fix invalid free() after memory allocation failure. The bug was in the single-threaded .xz Stream encoder in the code that is used for both re-initialization and for lzma_filters_update(). To trigger it, an application had to either re-initialize an existing encoder instance with lzma_stream_encoder() or use lzma_filters_update(), and then one of the 1-4 tiny allocations in lzma_filters_copy() (called from stream_encoder_update()) must fail. An error was correctly reported but the encoder state was corrupted. This is related to the recent fix in f8ee61e74eb40600445fdb601c374d582e1e9c8a which is good but it wasn't enough to fix the main problem in stream_encoder.c. src/liblzma/common/stream_encoder.c | 39 +++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) commit cafd6dc397ca8b5b5f7775e8d6876b8fe70f8e70 Author: Lasse Collin Date: 2022-11-22 16:37:15 +0200 liblzma: Fix language in a comment. src/liblzma/common/stream_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c392bf8ccba857baaf50399c4b460119befacd54 Author: Lasse Collin Date: 2022-11-22 11:20:17 +0200 liblzma: Fix infinite loop in LZMA encoder init with dict_size >= 2 GiB. The encoder doesn't support dictionary sizes larger than 1536 MiB. This is validated, for example, when calculating the memory usage via lzma_raw_encoder_memusage(). It is also enforced by the LZ part of the encoder initialization. However, LZMA encoder with LZMA_MODE_NORMAL did an unsafe calculation with dict_size before such validation and that results in an infinite loop if dict_size was 2 << 30 or greater. src/liblzma/lzma/lzma_encoder.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) commit f50534c973a591ccf65485adfc827a8a7126ca6c Author: Lasse Collin Date: 2022-11-21 13:02:33 +0200 liblzma: Fix two Doxygen commands in the API headers. These were caught by clang -Wdocumentation. src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/index_hash.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 649d4872ed2f55196114a061d45b416fc4353569 Author: Lasse Collin Date: 2022-11-19 19:09:55 +0200 xz: Refactor duplicate code from hardware_memlimit_mtenc_get(). src/xz/hardware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit d327743bb547a53364e5951a16e5f1663fe4b9ff Author: Lasse Collin Date: 2022-11-19 19:06:13 +0200 xz: Add support --threads=+N so that -T+1 gives threaded mode. src/xz/args.c | 18 +++++++++++++++--- src/xz/hardware.c | 17 +++++++++++++++-- src/xz/hardware.h | 1 + src/xz/xz.1 | 21 ++++++++++++++++++++- 4 files changed, 51 insertions(+), 6 deletions(-) commit a11a2b8b5e830ba682c1d81aaa7078842b296995 Author: Jia Tan Date: 2022-11-19 23:18:04 +0800 CMake: Adds test_memlimit to CMake tests CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) commit 2af8d9e9b3f44f62d19e7c39297ec63af2e8c64f Author: Lasse Collin Date: 2022-11-15 19:10:21 +0200 Translations: Update the Korean translation. po/ko.po | 652 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 371 insertions(+), 281 deletions(-) commit 16ac05677292f7e21a4feaddcfb2ab062ea5f385 Author: Lasse Collin Date: 2022-11-15 19:09:28 +0200 Translations: Update the Turkish translation. po/tr.po | 568 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 310 insertions(+), 258 deletions(-) commit b9a67d9a5fa207062d4aa8a01639234609315d31 Author: Lasse Collin Date: 2022-11-15 10:58:39 +0200 Bump version number for 5.3.4alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma_generic.map | 2 +- src/liblzma/liblzma_linux.map | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit 5b999ba289b3280457b7386b9ac65dbbdf1575a5 Author: Lasse Collin Date: 2022-11-15 10:54:40 +0200 Add NEWS for 5.3.4alpha. NEWS | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) commit ce8db9e37da4f6c87691c5066f51f91f2411c44a Author: Lasse Collin Date: 2022-11-15 10:54:08 +0200 Add NEWS for 5.2.8. NEWS | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) commit b56bc8251d2736224af6bdaaae734ceb8926a879 Author: Lasse Collin Date: 2022-11-14 23:19:57 +0200 Revert "liblzma: Simple/BCJ filters: Allow disabling generic BCJ options." This reverts commit 177bdc922cb17bd0fd831ab8139dfae912a5c2b8 and also does equivalent change to arm64.c. Now that ARM64 filter will use lzma_options_bcj, this change is not needed anymore. src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/arm64.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 4 ++-- src/liblzma/simple/simple_private.h | 2 +- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 3 +-- 9 files changed, 10 insertions(+), 11 deletions(-) commit 8370ec8edf9ddf8d1d9fef03d8d1027503ec4c35 Author: Lasse Collin Date: 2022-11-14 23:14:41 +0200 Replace the experimental ARM64 filter with a new experimental version. This is incompatible with the previous version. This has space/tab fixes in filter_*.c and bcj.h too. src/liblzma/api/lzma/bcj.h | 41 +----- src/liblzma/common/filter_common.c | 14 +- src/liblzma/common/filter_decoder.c | 12 +- src/liblzma/common/filter_encoder.c | 17 +-- src/liblzma/simple/arm64.c | 283 ++++++++++++++---------------------- src/liblzma/simple/simple_decoder.h | 4 - src/liblzma/simple/simple_encoder.h | 2 - src/xz/args.c | 2 +- src/xz/message.c | 13 +- src/xz/options.c | 39 ----- src/xz/options.h | 7 - 11 files changed, 147 insertions(+), 287 deletions(-) commit f644473a211394447824ea00518d0a214ff3f7f2 Author: Lasse Collin Date: 2022-11-14 21:34:57 +0200 liblzma: Add fast CRC64 for 32/64-bit x86 using SSSE3 + SSE4.1 + CLMUL. It also works on E2K as it supports these intrinsics. On x86-64 runtime detection is used so the code keeps working on older processors too. A CLMUL-only build can be done by using -msse4.1 -mpclmul in CFLAGS and this will reduce the library size since the generic implementation and its 8 KiB lookup table will be omitted. On 32-bit x86 this isn't used by default for now because by default on 32-bit x86 the separate assembly file crc64_x86.S is used. If --disable-assembler is used then this new CLMUL code is used the same way as on 64-bit x86. However, a CLMUL-only build (-msse4.1 -mpclmul) won't omit the 8 KiB lookup table on 32-bit x86 due to a currently-missing check for disabled assembler usage. The configure.ac check should be such that the code won't be built if something in the toolchain doesn't support it but --disable-clmul-crc option can be used to unconditionally disable this feature. CLMUL speeds up decompression of files that have compressed very well (assuming CRC64 is used as a check type). It is know that the CLMUL code is significantly slower than the generic code for tiny inputs (especially 1-8 bytes but up to 16 bytes). If that is a real-world problem then there is already a commented-out variant that uses the generic version for small inputs. Thanks to Ilya Kurdyukov for the original patch which was derived from a white paper from Intel [1] (published in 2009) and public domain code from [2] (released in 2016). [1] https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf [2] https://github.com/rawrunprotected/crc CMakeLists.txt | 26 ++- INSTALL | 12 ++ configure.ac | 59 +++++- src/liblzma/check/crc64_fast.c | 449 +++++++++++++++++++++++++++++++++++++++- src/liblzma/check/crc64_table.c | 21 +- 5 files changed, 554 insertions(+), 13 deletions(-) commit 3b466bc79672bb2b06d1245a500588e6026e0ba0 Author: Lasse Collin Date: 2022-11-14 20:14:34 +0200 Translations: Update the Swedish translation one more time. po/sv.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit e963379a8622ebdff6ce78e76b803bcd1e1d16d6 Author: Lasse Collin Date: 2022-11-14 19:34:15 +0200 Translations: Update the Swedish translation again. po/sv.po | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit a4bc689a823a2254f29ac9d233170add5121b307 Author: Lasse Collin Date: 2022-11-14 19:07:45 +0200 Translations: Update the Swedish translation. po/sv.po | 671 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 382 insertions(+), 289 deletions(-) commit bbf2073d824ab4ba33bed4b77f467435abd333a5 Author: Lasse Collin Date: 2022-11-14 18:58:09 +0200 Translations: Update the Ukrainian translation. po/uk.po | 618 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 354 insertions(+), 264 deletions(-) commit ac10b1b3622e70881595586edfb8a3ebdcd76bb6 Author: Lasse Collin Date: 2022-11-14 17:58:07 +0200 Build: Omit x86_64 from --enable-assembler. It didn't do anything. There are only 32-bit x86 assembly files and it feels likely that new files won't be added as intrinsics in C are more portable across toolchains and OSes. configure.ac | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit eb0f1450ad9f23dac03050d9c8375980240aee21 Author: Lasse Collin Date: 2022-11-14 16:00:52 +0200 liblzma: Use __attribute__((__constructor__)) if available. This uses it for CRC table initializations when using --disable-small. It avoids mythread_once() overhead. It also means that then --disable-small --disable-threads is thread-safe if this attribute is supported. CMakeLists.txt | 15 +++++++++++++++ INSTALL | 4 +++- configure.ac | 31 ++++++++++++++++++++++++++++--- src/liblzma/check/crc32_small.c | 7 +++++++ src/liblzma/check/crc64_small.c | 5 +++++ src/liblzma/lz/lz_encoder.c | 2 +- 6 files changed, 59 insertions(+), 5 deletions(-) commit 6553f49b11dafad35c73b05f12e14865ea1fd8a1 Author: Lasse Collin Date: 2022-11-12 21:19:52 +0200 Translations: Update the Romanian translation. po/ro.po | 651 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 380 insertions(+), 271 deletions(-) commit db97e69e12393becc29f8febd53133d0d36989bd Author: Lasse Collin Date: 2022-11-12 21:17:45 +0200 Translations: Update the Hungarian translation. po/hu.po | 625 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 357 insertions(+), 268 deletions(-) commit 2bbb9c0f3829a8b121b36998d273a6c6f92000f4 Author: Lasse Collin Date: 2022-11-11 17:58:57 +0200 Translations: Update the Finnish translation. po/fi.po | 610 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 348 insertions(+), 262 deletions(-) commit 3c8cbb8137b6f8ed9416c1209d73cdbcb015251f Author: Lasse Collin Date: 2022-11-11 17:58:18 +0200 Translations: Update the Croatian translation. po/hr.po | 680 +++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 381 insertions(+), 299 deletions(-) commit 26c3359eac0988d6f3986735cd1363bec1678e8e Author: Lasse Collin Date: 2022-11-11 17:57:18 +0200 Translations: Update the Polish translation. po/pl.po | 569 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 309 insertions(+), 260 deletions(-) commit 577e467b137c735afb8de6ae71ac7a73c2960cc4 Author: Lasse Collin Date: 2022-11-11 17:56:44 +0200 Translations: Update the Spanish translation. po/es.po | 598 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 344 insertions(+), 254 deletions(-) commit f9b4ff6e9a0f1678650775582d3e4fe782abce97 Author: Lasse Collin Date: 2022-11-11 17:16:03 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit a39961ef211e1bf030b17edeea3cff29fe263b67 Author: Lasse Collin Date: 2022-11-11 17:15:25 +0200 liblzma: Fix building with Intel ICC (the classic compiler). It claims __GNUC__ >= 10 but doesn't support __symver__ attribute. Thanks to Stephen Sachs. src/liblzma/common/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c715f683dcb1a817d565da292cddfbceda643e12 Author: Lasse Collin Date: 2022-11-11 14:35:58 +0200 liblzma: Fix incorrect #ifdef for x86 SSE2 support. __SSE2__ is the correct macro for SSE2 support with GCC, Clang, and ICC. __SSE2_MATH__ means doing floating point math with SSE2 instead of 387. Often the latter macro is defined if the first one is but it was still a bug. src/liblzma/common/memcmplen.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit bd334ae56afe7f642ad4d0f1ac19e74e82daa1ce Author: Lasse Collin Date: 2022-11-11 13:27:06 +0200 Add NEWS for 5.2.7 (forgotten cherry-pick from v5.2). NEWS | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) commit 3c7860cf49de6f81046b3a4034a89f3a4803a576 Author: Lasse Collin Date: 2022-11-11 13:16:21 +0200 xzdiff: Add support for .lz files. The other scripts don't need changes for .lz support because in those scripts it is enough that xz supports .lz. src/scripts/xzdiff.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit d76c752a6d77052e5ad57ade555082585f7ac5d8 Author: Lasse Collin Date: 2022-11-11 12:23:58 +0200 Scripts: Ignore warnings from xz. In practice this means making the scripts work when the input files have an unsupported check type which isn't a problem in practice unless support for some check types has been disabled at build time. src/scripts/xzdiff.in | 5 +++-- src/scripts/xzgrep.in | 2 +- src/scripts/xzless.in | 4 ++-- src/scripts/xzmore.in | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) commit 6552535afd1fe29d726ab6e68cf14ce3624fd48c Author: Lasse Collin Date: 2022-11-10 12:34:43 +0200 Translations: Rename poa4/fr_FR.po to po4a/fr.po. That's how it is preferred at the Translation Project. On my system /usr/share/man/fr_FR doesn't contain any other man pages than XZ Utils while /usr/share/man/fr has quite a few, so this will fix that too. Thanks to Benno Schulenberg from the Translation Project. po4a/{fr_FR.po => fr.po} | 0 po4a/po4a.conf | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) commit 0918159ce4c75bfb60aff0193b559f8a9f41d25a Author: Lasse Collin Date: 2022-11-09 18:48:50 +0200 xz: Update the man page about BCJ filters, including upcoming --arm64. The --arm64 isn't actually implemented yet in the form described in this commit. Thanks to Jia Tan. src/xz/xz.1 | 66 +++++++++++++++++++++++++++---------------------------------- 1 file changed, 29 insertions(+), 37 deletions(-) commit ba2ae3596f6be1587495f33b367488f6e00e56f1 Author: Lasse Collin Date: 2022-11-09 18:14:14 +0200 xz: Add --arm64 to --long-help and omit endianness from ARM(-Thumb). Modern 32-bit ARM in big endian mode use little endian for instruction encoding still, so the filters work on such executables too. It's likely less confusing for users this way. The --arm64 option hasn't been implemented yet (there is --experimental-arm64 but it's different). The --arm64 option is added now anyway because this is the likely result and the strings need to be ready for translators. Thanks to Jia Tan. src/xz/message.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 802d57d9215d9c81dbee86edb43c9e93a7f7ec55 Author: Lasse Collin Date: 2022-11-09 15:12:13 +0200 Windows: Update the VS project files for ARM64 and .lz support. windows/vs2013/config.h | 9 +++++++++ windows/vs2013/liblzma.vcxproj | 5 ++++- windows/vs2013/liblzma_dll.vcxproj | 5 ++++- windows/vs2017/config.h | 9 +++++++++ windows/vs2017/liblzma.vcxproj | 3 +++ windows/vs2017/liblzma_dll.vcxproj | 3 +++ windows/vs2019/config.h | 9 +++++++++ windows/vs2019/liblzma.vcxproj | 5 ++++- windows/vs2019/liblzma_dll.vcxproj | 5 ++++- 9 files changed, 49 insertions(+), 4 deletions(-) commit 5846aeda05972bc803c6094821ae836229ebe691 Author: Lasse Collin Date: 2022-11-09 14:57:48 +0200 DOS: Update Makefile and config.h to include ARM64 and .lz support. dos/Makefile | 2 ++ dos/config.h | 9 +++++++++ 2 files changed, 11 insertions(+) commit 781da8d6c44de6aa278c916375250668a0b107f2 Author: Lasse Collin Date: 2022-11-09 14:45:05 +0200 CMake: Add lzip decoder files and #define to the build. CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) commit df8ad4af65a9c4846b108550d0083770a69dee64 Author: Lasse Collin Date: 2022-11-09 14:41:56 +0200 Docs: Update INSTALL and also add new prohibited options to PACKAGERS. INSTALL | 49 +++++++++++++++++++++++++++++++++++++++++-------- PACKAGERS | 2 ++ 2 files changed, 43 insertions(+), 8 deletions(-) commit c8ef089c149afaab413c3a51be827dd1d11afe0e Author: Lasse Collin Date: 2022-10-20 17:39:06 +0300 Tests: Test the .lz files in test_files.sh. tests/test_files.sh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) commit c8f70ebb4628ceb6cb29cc9195d9deadf69d2bd7 Author: Lasse Collin Date: 2022-10-20 15:35:59 +0300 Tests: Add .lz (lzip) test files. tests/files/README | 109 +++++++++++++++++++++++++++++---- tests/files/bad-1-v0-uncomp-size.lz | Bin 0 -> 42 bytes tests/files/bad-1-v1-crc32.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-dict-1.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-dict-2.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-magic-1.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-magic-2.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-member-size.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-trailing-magic.lz | Bin 0 -> 54 bytes tests/files/bad-1-v1-uncomp-size.lz | Bin 0 -> 50 bytes tests/files/good-1-v0-trailing-1.lz | Bin 0 -> 59 bytes tests/files/good-1-v0.lz | Bin 0 -> 42 bytes tests/files/good-1-v1-trailing-1.lz | Bin 0 -> 67 bytes tests/files/good-1-v1-trailing-2.lz | Bin 0 -> 70 bytes tests/files/good-1-v1.lz | Bin 0 -> 50 bytes tests/files/good-2-v0-v1.lz | Bin 0 -> 78 bytes tests/files/good-2-v1-v0.lz | Bin 0 -> 78 bytes tests/files/good-2-v1-v1.lz | Bin 0 -> 86 bytes tests/files/unsupported-1-v234.lz | Bin 0 -> 50 bytes 19 files changed, 98 insertions(+), 11 deletions(-) commit 731db13e6fa3ad3e3fc786c0ccf6eac4cce6865f Author: Lasse Collin Date: 2022-10-19 22:32:51 +0300 xz: Remove the commented-out FORMAT_GZIP, gzip, .gz, and .tgz. src/xz/args.c | 2 -- src/xz/coder.h | 1 - src/xz/suffix.c | 9 --------- 3 files changed, 12 deletions(-) commit 3176f992c55b8d788c4633809aaf9447376a5a12 Author: Lasse Collin Date: 2022-10-08 21:28:15 +0300 xz: Add .lz (lzip) decompression support. If configured with --disable-lzip-decoder then --long-help will still list `lzip' in --format but I left it like that since due to translations it would be messy to have two help strings. Features are disabled only in special situations so wrong help in such a situation shouldn't matter much. Thanks to Michał Górny for the original patch. src/xz/args.c | 9 ++++++++ src/xz/coder.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/xz/coder.h | 3 +++ src/xz/message.c | 2 +- src/xz/suffix.c | 26 ++++++++++++++++++---- src/xz/xz.1 | 46 +++++++++++++++++++++++++++++++++----- 6 files changed, 141 insertions(+), 13 deletions(-) commit 034086e1ae1459210837a24e04878435c86dc41b Author: Lasse Collin Date: 2022-10-08 00:29:20 +0300 liblzma: Add .lz support to lzma_auto_decoder(). Thanks to Michał Górny for the original patch. src/liblzma/api/lzma/container.h | 10 ++++++---- src/liblzma/common/Makefile.inc | 3 ++- src/liblzma/common/auto_decoder.c | 23 +++++++++++++++++------ src/liblzma/common/lzip_decoder.h | 22 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 11 deletions(-) commit 0538db038f3cdc352007dacb42454aa1806b8e40 Author: Lasse Collin Date: 2022-10-06 15:50:20 +0300 liblzma: Add .lz (lzip) decompression support (format versions 0 and 1). Support for format version 0 was removed from lzip 1.18 for some reason. .lz format version 0 files are rare (and old) but some source packages were released in this format, and some people might have personal files in this format too. It's very little extra code to support it along side format version 1 so this commits adds support for both. The Sync Flush marker extentension to the original .lz format version 1 isn't supported. It would require changes to the LZMA decoder itself. Such files are very rare anyway. See the API doc for lzma_lzip_decoder() for more details about the .lz format support. Thanks to Michał Górny for the original patch. configure.ac | 21 ++ src/liblzma/api/lzma/container.h | 62 +++++- src/liblzma/common/Makefile.inc | 5 + src/liblzma/common/lzip_decoder.c | 413 ++++++++++++++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 1 + src/liblzma/liblzma_linux.map | 1 + 6 files changed, 501 insertions(+), 2 deletions(-) commit 633d48a075b9ce4b9c08a7a56a7eb4cabc18100c Author: Lasse Collin Date: 2022-11-09 14:17:23 +0200 liblzma: Add the missing Makefile.inc change for --disable-microlzma. This was forgotten from commit 59c4d6e1390f6f4176f43ac1dad1f7ac03c449b8. src/liblzma/common/Makefile.inc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit 724285dadbdc88765c8fb83eab9816575a260966 Author: Lasse Collin Date: 2022-11-09 14:10:52 +0200 xz: Add comments about stdin and src_st.st_size. "xz -v < regular_file > out.xz" doesn't display the percentage and estimated remaining time because it doesn't even try to check the input file size when input is read from stdin. This could be improved but for now there's just a comment to remind about it. src/xz/coder.c | 9 +++++++++ src/xz/file_io.c | 4 ++++ 2 files changed, 13 insertions(+) commit f723eec68b0e44234910f669a29119de33018967 Author: Lasse Collin Date: 2022-11-09 12:48:22 +0200 xz: Fix displaying of file sizes in progress indicator in passthru mode. It worked for one input file since the counters are zero when xz starts but they weren't reset when starting a new file in passthru mode. For example, if files A, B, and C are one byte each, then "xz -dcvf A B C" would show file sizes as 1, 2, and 3 bytes instead of 1, 1, and 1 byte. src/xz/coder.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 69265d0f223ddf1d66f799b8b047df22923e376f Author: Lasse Collin Date: 2022-11-09 11:27:20 +0200 xz: Add a comment why --to-stdout is not in --help. It is on the man page still. src/xz/message.c | 3 +++ 1 file changed, 3 insertions(+) commit fe6b8852a3c6a0eb5a3c33512e0a69af257d3bc7 Author: Lasse Collin Date: 2022-11-08 23:05:37 +0200 xz: Make xz -lvv show that the upcoming --arm64 needs 5.4.0 to decompress. src/xz/list.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) commit fb3f05ac9f2b4b0e3643401960fbeab31997ac7a Author: Lasse Collin Date: 2022-11-08 22:26:54 +0200 Docs: Update faq.txt a little. doc/faq.txt | 66 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 23 deletions(-) commit 05331f091ec3b68eccbfb2a9a7a576072768fb4b Author: Lasse Collin Date: 2022-11-08 16:57:17 +0200 Translations: Update Turkish translation. po/tr.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ed3a4822963b4940d84e6f44d47277c394fc046d Author: Lasse Collin Date: 2022-11-08 14:55:32 +0200 Translations: Update Croatian translation. po/hr.po | 190 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 96 insertions(+), 94 deletions(-) commit 4746f5ec721316bc4c6fec9905b2902e0360e0af Author: Lasse Collin Date: 2022-11-08 14:13:03 +0200 liblzma: Update API docs about decoder flags. src/liblzma/api/lzma/container.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) commit 8779a9db5d0cec00c9dc9e9965dd2dda04f9d80d Author: Lasse Collin Date: 2022-11-08 14:01:50 +0200 liblzma: Use the return_if_error() macro in alone_decoder.c. src/liblzma/common/alone_decoder.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit 3f4990b6822961e75cd9b4e2e82b1df63f6f8fcc Author: Lasse Collin Date: 2022-11-08 14:00:58 +0200 liblzma: Fix a comment in auto_decoder.c. src/liblzma/common/auto_decoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 026a5897c72a2041ae08ceec54ce8b1cdeb51334 Author: Lasse Collin Date: 2022-11-08 13:43:19 +0200 xz: Initialize the pledge(2) sandbox at the very beginning of main(). It feels better that the initializations are sandboxed too. They don't do anything that the pledge() call wouldn't allow. src/xz/main.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) commit 49a59f6ca001c3ce9affa2c162b437aad021b4d5 Author: Lasse Collin Date: 2022-11-07 22:51:16 +0200 xz: Extend --robot --info-memory output. Now it includes everything that the human-readable --info-memory shows. src/xz/hardware.c | 24 +++++++++++++++--------- src/xz/xz.1 | 47 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) commit 5e2450c75cbac966c62cf2231c824f2cc91ddba8 Author: Lasse Collin Date: 2022-11-07 17:22:04 +0200 liblzma: Include cached memory in reported memusage in threaded decoder. This affects lzma_memusage() and lzma_memlimit_set() when used with the threaded decompressor. Now all allocations are reported by lzma_memusage() (so it's not misleading) and lzma_memlimit_set() cannot lower the limit below that value. The alternative would have been to allow lowering the limit if doing so is possible by freeing the cached memory but since the primary use case of lzma_memlimit_set() is to increase memlimit after LZMA_MEMLIMIT_ERROR this simple approach was selected. The cached memory was always included when enforcing the memory usage limit while decoding. Thanks to Jia Tan. src/liblzma/common/stream_decoder_mt.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) commit 1fc6e7dd1fabdb60124d449b99273330ccab3ff1 Author: Jia Tan Date: 2022-11-07 16:24:14 +0200 xz: Avoid a compiler warning in progress_speed() in message.c. This should be smaller too since it avoids the string constants. src/xz/message.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) commit cf118c014683069b5dbe91898acdc40f2f0a1f5d Author: Lasse Collin Date: 2022-10-31 16:26:05 +0200 Build: Clarify comment in configure.ac about SSE2. configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit aad3c609ffb72f581a7a2b67be3ad70b2b327840 Author: Lasse Collin Date: 2022-10-31 16:16:37 +0200 Build: Remove obsolete commented-out lines from configure.ac. configure.ac | 4 ---- 1 file changed, 4 deletions(-) commit e53e0e2186c6b8ce866bd19aec52f1c318ed31ba Author: Lasse Collin Date: 2022-10-31 13:31:58 +0200 Windows: Fix mythread_once() macro with Vista threads. Don't call InitOnceComplete() if initialization was already done. So far mythread_once() has been needed only when building with --enable-small. windows/build.bash does this together with --disable-threads so the Vista-specific mythread_once() is never needed by those builds. VS project files or CMake-builds don't support HAVE_SMALL builds at all. src/common/mythread.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 48dde3bab9dc04081acb5aa7cf7c5044b8a49f58 Author: Lasse Collin Date: 2022-10-31 11:54:44 +0200 liblzma: Silence -Wconversion warning from crc64_fast.c. src/liblzma/check/crc64_fast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit a243c617ff249d915ac123de4f536b80322c1fdb Author: Lasse Collin Date: 2022-10-31 11:49:47 +0200 CMake: Sync tuklib_cpucores.cmake with tuklib_cpucores.m4. This was forgotten from commit 2611c4d90535652d3eb7ef4a026a6691276fab43. cmake/tuklib_cpucores.cmake | 5 +++++ 1 file changed, 5 insertions(+) commit 05c72de06fcaaedc78f8abba7d5ec568ddcf1e75 Author: Lasse Collin Date: 2022-10-27 15:49:18 +0300 Tests: test_files.sh: Make it not fail if features were disabled at build. It now tries to test as many files as easily possible. The exit status indicates skipping if any of the files were skipped. This way it is easy to notice if something is being skipped when it isn't expected. tests/test_files.sh | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) commit b3459327a51f4b8239d19e6c34b4e0c6bc2d81de Author: Lasse Collin Date: 2022-10-27 15:30:13 +0300 Tests: test_files.sh: Suppress an expected warning from the log. xz (but not xzdec) will normally warn about unsupported check but since we are testing specifically such a file, it's better to silence that warning so that it doesn't look suspicious in test_files.sh.log. The use of -q and -Q in xzdec is just for consistency and doesn't affect the result at least for now. tests/test_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 798c86e4231e0835ab76ccd0810c8ea30833b2ce Author: Lasse Collin Date: 2022-10-27 15:27:50 +0300 Tests: test_files.sh: Print the reason for skipping if xz & xzdec missing. tests/test_files.sh | 1 + 1 file changed, 1 insertion(+) commit c1dd8524e1af07f16b790463899de06a6a5fcc08 Author: Lasse Collin Date: 2022-10-27 01:12:40 +0300 Tests: Keep test_compress_* working when some filters are unavailable. tests/test_compress.sh | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) commit ce30ada91951d0746879ae438da11f1ee8a90aa0 Author: Jia Tan Date: 2022-10-23 21:01:08 +0800 Tests: test_bcj_exact_size skips properly now if PowerPC filter disabled. tests/test_bcj_exact_size.c | 3 +++ 1 file changed, 3 insertions(+) commit 89c5cfcacaca7130509fac836e2f30c46b824502 Author: Lasse Collin Date: 2022-10-26 00:05:57 +0300 Tests: Test also unsupported-*.xz. tests/test_files.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) commit a4b214b93ac741edef9c41e55865b0b867ca2587 Author: Lasse Collin Date: 2022-10-25 23:45:03 +0300 Build: Use AC_CONFIG_HEADERS instead of the ancient AC_CONFIG_HEADER. We require Autoconf >= 2.69 and that has AC_CONFIG_HEADERS. There is a warning about AC_PROG_CC_C99 being obsolete but it cannot be removed because it is needed with Autoconf 2.69. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 04f299b64e73f50afc188c2590ebebc6b73ed744 Author: Lasse Collin Date: 2022-10-25 23:31:44 +0300 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) commit 59c4d6e1390f6f4176f43ac1dad1f7ac03c449b8 Author: Lasse Collin Date: 2022-10-25 23:28:34 +0300 Build: Add configure option --disable-microlzma. MicroLZMA was made for EROFS and used by erofs-utils. It might be used by something else in the future but those wanting a smaller build for specific situations can now disable this rarely-needed feature. configure.ac | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) commit 054ccd6d14b2cc6eddc56897af280d3221414150 Author: Lasse Collin Date: 2022-10-25 23:09:11 +0300 xz: Fix --single-stream with an empty .xz Stream. Example: $ xz -dc --single-stream good-0-empty.xz xz: good-0-empty.xz: Internal error (bug) The code, that is tries to catch some input file issues early, didn't anticipate LZMA_STREAM_END which is possible in that code only when --single-stream is used. src/xz/coder.c | 9 +++++++++ 1 file changed, 9 insertions(+) commit 563288ea705e83ff5cb292adf794650c263bca1d Author: Lasse Collin Date: 2022-10-25 21:11:58 +0300 xz: Add support for OpenBSD's pledge() sandbox. configure.ac | 12 +++++++++--- src/xz/file_io.c | 11 +++++++++++ src/xz/main.c | 13 +++++++++++++ src/xz/private.h | 2 +- 4 files changed, 34 insertions(+), 4 deletions(-) commit f9913e8ee2ba0b1e4ff4d0aa4c001aae305ed944 Author: Lasse Collin Date: 2022-10-25 19:07:17 +0300 xz: Fix decompressor behavior if input uses an unsupported check type. Now files with unsupported check will make xz display a warning, set the exit status to 2 (unless --no-warn is used), and then decompress the file normally. This is how it was supposed to work since the beginning but this was broken by the commit 231c3c7098f1099a56abb8afece76fc9b8699f05, that is, a little before 5.0.0 was released. The buggy behavior displayed a message, set exit status 1 (error), and xz didn't attempt to to decompress the file. This doesn't matter today except for special builds that disable CRC64 or SHA-256 at build time (but such builds should be used in special situations only). The bug matters if new check type is added in the future and an old xz version is used to decompress such a file; however, it's likely that such files would use a new filter too and an old xz wouldn't be able to decompress the file anyway. The first hunk in the commit is the actual fix. The second hunk is a cleanup since LZMA_TELL_ANY_CHECK isn't used in xz. There is a test file for unsupported check type but it wasn't used by test_files.sh, perhaps due to different behavior between xz and the simpler xzdec. src/xz/coder.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) commit aa4fe145b9486adc454f44fd3e09be9add808a0f Author: Lasse Collin Date: 2022-10-25 18:36:19 +0300 xz: Clarify the man page: input file isn't removed if an error occurs. src/xz/xz.1 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 8b46ae8cdeddfd7dc01fec92971b8696e9a96c5d Author: Lasse Collin Date: 2022-10-25 18:30:55 +0300 xz: Refactor to remove is_empty_filename(). Long ago it was used in list.c too but nowadays it's needed only in io_open_src() so it's nicer to avoid a separate function. src/xz/file_io.c | 4 +++- src/xz/util.c | 12 ------------ src/xz/util.h | 4 ---- 3 files changed, 3 insertions(+), 17 deletions(-) commit 85624015978b0de294cff3df79006df987c552b1 Author: Lasse Collin Date: 2022-10-25 18:23:54 +0300 xz: If input file cannot be removed, treat it as a warning, not error. Treating it as a warning (message + exit status 2) matches gzip and it seems more logical as at that point the output file has already been successfully closed. When it's a warning it is possible to suppress it with --no-warn. src/xz/file_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit fda9f85f52c546f7ca0313cf89481da4707fecb3 Author: Lasse Collin Date: 2022-10-24 16:25:09 +0300 liblzma: Threaded decoder: Stop the worker threads on errors. It's waste of CPU time and electricity to leave the unfinished worker threads running when it is known that their output will get ignored. src/liblzma/common/stream_decoder_mt.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) commit 2611c4d90535652d3eb7ef4a026a6691276fab43 Author: Lasse Collin Date: 2022-10-20 20:22:50 +0300 tuklib_cpucores: Use HW_NCPUONLINE on OpenBSD. On OpenBSD the number of cores online is often less than what HW_NCPU would return because OpenBSD disables simultaneous multi-threading (SMT) by default. Thanks to Christian Weisgerber. m4/tuklib_cpucores.m4 | 5 +++++ src/common/tuklib_cpucores.c | 9 +++++++++ 2 files changed, 14 insertions(+) commit 424ac91c7e0419393ff2bde4f62e21fa611c776d Author: Lasse Collin Date: 2022-10-19 19:39:35 +0300 Tests: Skip tests in test_*.sh if encoders and/or decoders are disabled. This isn't perfect as the scripts can still fail if only certain filters are disabled. This is still an improvement as now "make check" has better behavior when all encoders or decoders are disabled. Grepping ../config.h is simple and fairly clean but it only works if config.h was created. CMake builds don't create config.h but they don't use these test scripts either. Thanks to Sebastian Andrzej Siewior for reporting the problem. Thanks to Jia Tan for the original patch which grepped xz error messages instead of config.h. tests/test_compress.sh | 12 ++++++++++++ tests/test_files.sh | 11 +++++++++++ tests/test_scripts.sh | 11 +++++++++++ 3 files changed, 34 insertions(+) commit ca8bf9d7c5a30be8ba1eeb106fd892f19e83ed09 Author: Lasse Collin Date: 2022-10-19 18:54:34 +0300 Test: Remove the (exit 1) lines. I suspect that I used these in the original version because Autoconf's manual describes that such a trick is needed in some specific situations for portability reasons. None of those situations listed on Autoconf 2.71 manual apply to these test scripts though so this cleans them up. tests/test_compress.sh | 10 ---------- tests/test_files.sh | 9 --------- tests/test_scripts.sh | 6 ------ 3 files changed, 25 deletions(-) commit 82fcb7cfc17ce62f79ebc7ca2374e1daca5e4d5e Author: Lasse Collin Date: 2022-10-19 17:14:57 +0300 Tests: Fix a warning in test_memlimit.c when decoders are disabled. tests/test_memlimit.c | 3 +++ 1 file changed, 3 insertions(+) commit b5f8271b45b9b0e59485ffba3640ca3418835ec4 Author: Lasse Collin Date: 2022-10-19 17:11:46 +0300 Tests: Add test_memlimit to .gitignore. Thanks to Jia Tan. .gitignore | 1 + 1 file changed, 1 insertion(+) commit 6a86e81cab202d0a812a7b2e9efacaf70c58ba38 Author: Jia Tan Date: 2022-10-06 21:53:09 +0300 Tests: Refactor test_stream_flags.c. Converts test_stream_flags to tuktest. Also the test will now compile and skip properly if encoders or decoders are disabled. Thanks to Sebastian Andrzej Siewior. tests/test_stream_flags.c | 533 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 416 insertions(+), 117 deletions(-) commit 827ac5b4821491fd3afe0d0e1ddac326253aeb66 Author: Jia Tan Date: 2022-10-06 17:00:38 +0800 Tests: Refactor test_block_header.c. test_block_header now achieves higher test coverage. Also the test will now compile and skip properly if encoders or decoders are disabled. Thanks to Sebastian Andrzej Siewior. tests/test_block_header.c | 486 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 370 insertions(+), 116 deletions(-) commit 84963318952064a93bfc52edd6b0ef70593384ee Author: Jia Tan Date: 2022-10-05 23:54:12 +0800 Tests: Fix compilation issues. test_bcj_exact_size, test_check, test_hardware, and test_index will all now compile and skip properly if encoders or decoders are disabled. Also fixed a small typo (disabed -> disabled). Thanks to Sebastian Andrzej Siewior. tests/test_bcj_exact_size.c | 20 ++++++++++++++------ tests/test_check.c | 8 +++++++- tests/test_hardware.c | 2 +- tests/test_index.c | 6 ++++++ tests/test_memlimit.c | 16 +++++++++++++++- tests/test_vli.c | 13 +++++++++++++ 6 files changed, 56 insertions(+), 9 deletions(-) commit 7dcabeec63d46b436fa5f043c3d1f09d0e15be16 Author: Lasse Collin Date: 2022-10-05 16:20:47 +0300 Tests: Include mythread.h in the tests that use MYTHREAD_ENABLED. tests/test_check.c | 1 + tests/test_hardware.c | 1 + tests/test_memlimit.c | 1 + 3 files changed, 3 insertions(+) commit 14af758a770c7781af18fb66d6d21ee5b1c27f04 Author: Jia Tan Date: 2022-10-05 20:57:16 +0800 liblzma: Fix a compilation issue when encoders are disabled. When encoders were disabled and threading enabled, outqueue.c and outqueue.h were not compiled. The multi threaded decoder required these files, so compilation failed. src/liblzma/common/Makefile.inc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 6ca5c354bd4620aa7f81da68870eef1b1f26288f Author: Jia Tan Date: 2022-10-05 16:41:38 +0800 Tests: Fix compilation error when threading support has been disabled. Now tests that require threading are skipped when threading support has been disabled. Thanks to Sebastian Andrzej Siewior. tests/test_check.c | 4 ++++ tests/test_hardware.c | 4 ++++ tests/test_memlimit.c | 4 ++++ 3 files changed, 12 insertions(+) commit fae37ad2affd8fe8871f4ff93d5cab5ec14d5e58 Author: Lasse Collin Date: 2022-10-05 14:26:00 +0300 tuklib_integer: Add 64-bit endianness-converting reads and writes. Also update the comment in liblzma's memcmplen.h. Thanks to Michał Górny for the original patch for the reads. m4/tuklib_integer.m4 | 8 ++++---- src/common/tuklib_integer.h | 46 ++++++++++++++++++++++++++++++++++++++++-- src/liblzma/common/memcmplen.h | 9 +++------ 3 files changed, 51 insertions(+), 12 deletions(-) commit 508a44372c5b0dede8863fd0d358d4a9d8645c95 Author: Lasse Collin Date: 2022-09-30 12:06:13 +0300 liblzma: Add API doc note about the .xz decoder LZMA_MEMLIMIT_ERROR bug. The bug was fixed in 660739f99ab211edec4071de98889fb32ed04e98. src/liblzma/api/lzma/base.h | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 8cc9874a7974cd575aee44f218836f7acdbeb0ed Author: Jia Tan Date: 2022-09-21 16:15:50 +0800 liblzma: Add dest and src NULL checks to lzma_index_cat. The documentation states LZMA_PROG_ERROR can be returned from lzma_index_cat. Previously, lzma_index_cat could not return LZMA_PROG_ERROR. Now, the validation is similar to lzma_index_append, which does a NULL check on the index parameter. src/liblzma/common/index.c | 3 +++ 1 file changed, 3 insertions(+) commit afd5a8bf5374eba82804a999e1ea7af680784086 Author: Jia Tan Date: 2022-09-21 20:29:28 +0800 Tests: Create a test for the lzma_index_cat bug. tests/test_index.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) commit 3d5a99ca373a4e86faf671226ca6487febb9eeac Author: Jia Tan Date: 2022-09-21 19:28:53 +0800 liblzma: Fix copying of check type statistics in lzma_index_cat(). The check type of the last Stream in dest was never copied to dest->checks (the code tried to copy it but it was done too late). This meant that the value returned by lzma_index_checks() would only include the check type of the last Stream when multiple lzma_indexes had been concatenated. In xz --list this meant that the summary would only list the check type of the last Stream, so in this sense this was only a visual bug. However, it's possible that some applications use this information for purposes other than merely showing it to the users in an informational message. I'm not aware of such applications though and it's quite possible that such applications don't exist. Regular streamed decompression in xz or any other application doesn't use lzma_index_cat() and so this bug cannot affect them. src/liblzma/common/index.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit a61d32172789735350a941e23baf6b587c49e5d9 Author: Lasse Collin Date: 2022-09-28 12:20:41 +0300 tuklib_physmem: Fix Unicode builds on Windows. Thanks to ArSaCiA Game. src/common/tuklib_physmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5a4d3548ab214fdca364d5c734baf1d1fab47308 Author: Lasse Collin Date: 2022-09-28 11:12:07 +0300 Tests: Add test_memlimit.c to test restarting after LZMA_MEMLIMIT_ERROR. tests/Makefile.am | 2 + tests/test_memlimit.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) commit 660739f99ab211edec4071de98889fb32ed04e98 Author: Lasse Collin Date: 2022-09-28 11:05:15 +0300 liblzma: Stream decoder: Fix restarting after LZMA_MEMLIMIT_ERROR. If lzma_code() returns LZMA_MEMLIMIT_ERROR it is now possible to use lzma_memlimit_set() to increase the limit and continue decoding. This was supposed to work from the beginning but there was a bug. With other decoders (.lzma or threaded .xz) this already worked correctly. src/liblzma/common/stream_decoder.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) commit 7e68fda58c74ad9e5b876cc22fcbe80fc0e4747b Author: Lasse Collin Date: 2022-09-28 11:00:23 +0300 liblzma: Stream decoder: Fix comments. src/liblzma/common/stream_decoder.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) commit f664cb25841fc1c478b819034a224a558e2ac6e7 Author: Lasse Collin Date: 2022-09-20 16:58:22 +0300 liblzma: ARM64: Add comments. src/liblzma/simple/arm64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) commit b557b4a0eea05470fae8ba5ef4ad5a6dfb36ac41 Author: Lasse Collin Date: 2022-09-20 16:27:50 +0300 liblzma: ARM64: Fix wrong comment in API doc. Thanks to Jia Tan. src/liblzma/api/lzma/bcj.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit d5b0906fa55157f48c200188a3951d80df9cb308 Author: Lasse Collin Date: 2022-09-19 20:24:26 +0300 xz: Add --experimental-arm64[=width=WIDTH]. It will be renamed to --arm64 once it is stable. Man page or --long-help weren't updated yet. src/xz/args.c | 7 +++++++ src/xz/message.c | 7 +++++++ src/xz/options.c | 39 +++++++++++++++++++++++++++++++++++++++ src/xz/options.h | 7 +++++++ 4 files changed, 60 insertions(+) commit ecb966de308c255bb4735a7307ef9901c643a9de Author: Lasse Collin Date: 2022-09-19 19:34:56 +0300 liblzma: Add experimental ARM64 BCJ filter with a temporary Filter ID. That is, the Filter ID will be changed once the design is final. The current version will be removed. So files created with the tempoary Filter ID won't be supported in the future. CMakeLists.txt | 3 + configure.ac | 4 +- src/liblzma/api/lzma/bcj.h | 35 +++++- src/liblzma/common/filter_common.c | 9 ++ src/liblzma/common/filter_decoder.c | 8 ++ src/liblzma/common/filter_encoder.c | 11 ++ src/liblzma/simple/Makefile.inc | 4 + src/liblzma/simple/arm64.c | 227 ++++++++++++++++++++++++++++++++++++ src/liblzma/simple/simple_coder.h | 9 ++ src/liblzma/simple/simple_decoder.h | 4 + src/liblzma/simple/simple_encoder.h | 2 + 11 files changed, 313 insertions(+), 3 deletions(-) commit 177bdc922cb17bd0fd831ab8139dfae912a5c2b8 Author: Lasse Collin Date: 2022-09-17 22:42:18 +0300 liblzma: Simple/BCJ filters: Allow disabling generic BCJ options. This will be needed for the ARM64 BCJ filter as it will use its own options struct. src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 4 ++-- src/liblzma/simple/simple_private.h | 2 +- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 3 ++- 8 files changed, 10 insertions(+), 9 deletions(-) commit c3592d0a55114144686ecf960cb516d6b31c98e9 Author: Lasse Collin Date: 2022-09-16 17:08:53 +0300 Tests: Add a test file for lzma_index_append() integer overflow bug. This test fails before commit 18d7facd3802b55c287581405c4d49c98708c136. test_files.sh now runs xz -l for bad-3-index-uncomp-overflow.xz because only then the previously-buggy code path gets tested. Normal decompression doesn't use lzma_index_append() at all. Instead, lzma_index_hash functions are used and those already did the overflow check. tests/files/README | 10 ++++++++++ tests/files/bad-3-index-uncomp-overflow.xz | Bin 0 -> 132 bytes tests/test_files.sh | 8 ++++++++ 3 files changed, 18 insertions(+) commit 982b29f828079a2a26253a40e975127a40a7d2bd Author: Lasse Collin Date: 2022-09-16 15:10:07 +0300 Translations: Add Turkish translation. po/LINGUAS | 1 + po/tr.po | 977 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 978 insertions(+) commit 1fc088d9f6d1697924aaeac8cd1fb9918d1532e2 Author: Lasse Collin Date: 2022-09-16 14:09:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 097c7b67ce86ff16a7cef7631b39e5ca4ee3d420 Author: Lasse Collin Date: 2022-09-16 14:07:03 +0300 xzgrep: Fix compatibility with old shells. Running the current xzgrep on Slackware 10.1 with GNU bash 3.00.15: xzgrep: line 231: syntax error near unexpected token `;;' On SCO OpenServer 5.0.7 with Korn Shell 93r: syntax error at line 231 : `;;' unexpected Turns out that some old shells don't like apostrophes (') inside command substitutions. For example, the following fails: x=$(echo foo # asdf'zxcv echo bar) printf '%s\n' "$x" The problem was introduced by commits 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 (2022-03-29), bd7b290f3fe4faeceb7d3497ed9bf2e6ed5e7dc5 (2022-07-18), and a648978b20495b7aa4a8b029c5a810b5ad9d08ff (2022-07-19). 5.2.6 is the only stable release that included this problem. Thanks to Kevin R. Bulgrien for reporting the problem on SCO OpenServer 5.0.7 and for providing the fix. src/scripts/xzgrep.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit f2d084fe3f0d6d71488bfc6987f26542f67bfd99 Author: Lasse Collin Date: 2022-09-09 14:12:30 +0300 Tests: Silence warnings about unused functions from tuktest.h. Warnings about unused tuktest_run_test conveniently tell which test programs haven't been converted to tuktest.h yet but I silenced that warning too for now anyway. It is fine to use __attribute__((__unused__)) even when the function is actually used because the attribute only means that the function might be unused. tests/tuktest.h | 10 ++++++++++ 1 file changed, 10 insertions(+) commit f8ee61e74eb40600445fdb601c374d582e1e9c8a Author: Lasse Collin Date: 2022-09-09 13:51:57 +0300 liblzma: lzma_filters_copy: Keep dest[] unmodified if an error occurs. lzma_stream_encoder() and lzma_stream_encoder_mt() always assumed this. Before this patch, failing lzma_filters_copy() could result in free(invalid_pointer) or invalid memory reads in stream_encoder.c or stream_encoder_mt.c. To trigger this, allocating memory for a filter options structure has to fail. These are tiny allocations so in practice they very rarely fail. Certain badness in the filter chain array could also make lzma_filters_copy() fail but both stream_encoder.c and stream_encoder_mt.c validate the filter chain before trying to copy it, so the crash cannot occur this way. src/liblzma/api/lzma/filter.h | 4 +++- src/liblzma/common/filter_common.c | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) commit 18d7facd3802b55c287581405c4d49c98708c136 Author: Jia Tan Date: 2022-09-02 20:18:55 +0800 liblzma: lzma_index_append: Add missing integer overflow check. The documentation in src/liblzma/api/lzma/index.h suggests that both the unpadded (compressed) size and the uncompressed size are checked for overflow, but only the unpadded size was checked. The uncompressed check is done first since that is more likely to occur than the unpadded or index field size overflows. src/liblzma/common/index.c | 4 ++++ 1 file changed, 4 insertions(+) commit 9ac06cb5b85274d18f9f70d82cf2d8c9c1151bd4 Author: Lasse Collin Date: 2022-09-08 15:11:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit ba3e4ba2de034ae93a513f9c3a0823b80cdb66dc Author: Jia Tan Date: 2022-09-08 15:07:00 +0300 CMake: Clarify a comment about Windows symlinks without file extension. CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) commit 17485e884ce5c74315f29a8a1507bc706cd5cd1d Author: Lasse Collin Date: 2022-09-08 15:02:41 +0300 CMake: Update for liblzma_*.map files and fix wrong common_w32res.rc dep. The previous commit split liblzma.map into liblzma_linux.map and liblzma_generic.map. This commit updates the CMake build for those. common_w32res.rc dependency was listed under Linux/FreeBSD while obviously it belongs to Windows when building a DLL. CMakeLists.txt | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) commit 913ddc5572b9455fa0cf299be2e35c708840e922 Author: Lasse Collin Date: 2022-09-04 23:23:00 +0300 liblzma: Vaccinate against an ill patch from RHEL/CentOS 7. RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded encoder that is behind #ifdef LZMA_UNSTABLE in the API headers. In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map. API/ABI compatibility tracking isn't done between development releases so newer releases didn't have XZ_5.1.2alpha anymore. Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep the exported symbols compatible with 5.1.2alpha. After checking the ABI changes it turned out that >= 5.2.0 ABI is backward compatible with the threaded encoder functions from 5.1.2alpha (but not vice versa as fixes and extensions to these functions were made between 5.1.2alpha and 5.2.0). In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with xz-5.2.2-compat-libs.patch to modify liblzma.map: - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha. - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was an error; the intention was to keep using XZ_5.2 (XZ_5.2.2 has never been used in XZ Utils). So XZ_5.2.2 lists all symbols that were listed under XZ_5.2 before the patch. lzma_stream_encoder_mt and _mt_memusage are included too so they are listed both here and under XZ_5.1.2alpha. The patch didn't add any __asm__(".symver ...") lines to the .c files. Thus the resulting liblzma.so exports the threaded encoder functions under XZ_5.1.2alpha only. Listing the two functions also under XZ_5.2.2 in liblzma.map has no effect without matching .symver lines. The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7. This is unfortunate but this alone isn't too bad as the problem is contained within RHEL/CentOS 7 and doesn't affect users of other distributions. It could also be fixed internally in RHEL/CentOS 7. The second problem is more serious: In XZ Utils 5.2.2 the API headers don't have #ifdef LZMA_UNSTABLE for obvious reasons. This is true in RHEL/CentOS 7 version too. Thus now programs using new APIs can be compiled without an extra #define. However, the programs end up depending on symbol version XZ_5.1.2alpha (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would with an unpatched XZ Utils 5.2.2. This means that such binaries won't run on other distributions shipping XZ Utils >= 5.2.0 as they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch luckily isn't included there anymore with XZ Utils 5.2.4.) Binaries built by RHEL/CentOS 7 users get distributed and then people wonder why they don't run on some other distribution. Seems that people have found out about the patch and been copying it to some build scripts, seemingly curing the symptoms but actually spreading the illness further and outside RHEL/CentOS 7. The ill patch seems to be from late 2016 (RHEL 7.3) and in 2017 it had spread at least to EasyBuild. I heard about the events only recently. :-( This commit splits liblzma.map into two versions: one for GNU/Linux and another for other OSes that can use symbol versioning (FreeBSD, Solaris, maybe others). The Linux-specific file and the matching additions to .c files add full compatibility with binaries that have been built against a RHEL/CentOS-patched liblzma. Builds for OSes other than GNU/Linux won't get the vaccine as they should be immune to the problem (I really hope that no build script uses the RHEL/CentOS 7 patch outside GNU/Linux). The RHEL/CentOS compatibility symbols XZ_5.1.2alpha and XZ_5.2.2 are intentionally put *after* XZ_5.2 in liblzma_linux.map. This way if one forgets to #define HAVE_SYMBOL_VERSIONS_LINUX when building, the resulting liblzma.so.5 will have lzma_stream_encoder_mt@@XZ_5.2 since XZ_5.2 {...} is the first one that lists that function. Without HAVE_SYMBOL_VERSIONS_LINUX @XZ_5.1.2alpha and @XZ_5.2.2 will be missing but that's still a minor problem compared to only having lzma_stream_encoder_mt@@XZ_5.1.2alpha! The "local: *;" line was moved to XZ_5.0 so that it doesn't need to be moved around. It doesn't matter where it is put. Having two similar liblzma_*.map files is a bit silly as it is, at least for now, easily possible to generate the generic one from the Linux-specific file. But that adds extra steps and increases the risk of mistakes when supporting more than one build system. So I rather maintain two files in parallel and let validate_map.sh check that they are in sync when "make mydist" is run. This adds .symver lines for lzma_stream_encoder_mt@XZ_5.2.2 and lzma_stream_encoder_mt_memusage@XZ_5.2.2 even though these weren't exported by RHEL/CentOS 7 (only @@XZ_5.1.2alpha was for these two). I added these anyway because someone might misunderstand the RHEL/CentOS 7 patch and think that @XZ_5.2.2 (@@XZ_5.2.2) versions were exported too. At glance one could suggest using __typeof__ to copy the function prototypes when making aliases. However, this doesn't work trivially because __typeof__ won't copy attributes (lzma_nothrow, lzma_pure) and it won't change symbol visibility from hidden to default (done by LZMA_API()). Attributes could be copied with __copy__ attribute but that needs GCC 9 and a fallback method would be needed anyway. This uses __symver__ attribute with GCC >= 10 and __asm__(".symver ...") with everything else. The attribute method is required for LTO (-flto) support with GCC. Using -flto with GCC older than 10 is now broken on GNU/Linux and will not be fixed (can silently result in a broken liblzma build that has dangerously incorrect symbol versions). LTO builds with Clang seem to work with the traditional __asm__(".symver ...") method. Thanks to Boud Roukema for reporting the problem and discussing the details and testing the fix. configure.ac | 23 +++- src/liblzma/Makefile.am | 10 +- src/liblzma/common/block_buffer_encoder.c | 18 ++++ src/liblzma/common/common.c | 14 +++ src/liblzma/common/common.h | 28 +++++ src/liblzma/common/hardware_cputhreads.c | 12 +++ src/liblzma/common/stream_encoder_mt.c | 42 ++++++++ src/liblzma/{liblzma.map => liblzma_generic.map} | 6 +- src/liblzma/liblzma_linux.map | 131 +++++++++++++++++++++++ src/liblzma/validate_map.sh | 113 +++++++++++++++++-- 10 files changed, 382 insertions(+), 15 deletions(-) commit 80a1a8bb838842a2be343bd88ad1462c21c5e2c9 Author: Lasse Collin Date: 2022-08-31 16:42:04 +0300 CMake: Add xz symlinks. These are a minor thing especially since the xz build has some real problems still like lack of large file support on 32-bit systems but I'll commit this since the code exists. Thanks to Jia Tan. CMakeLists.txt | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) commit a4193bb6d85d7765b1b87faeab3e50106d3ab2e9 Author: Lasse Collin Date: 2022-08-31 16:29:38 +0300 CMake: Put xz man page install under if(UNIX) like is for xzdec. Thanks to Jia Tan. CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) commit c1555b1a221a1427d4d650647531016d754bc4da Author: Lasse Collin Date: 2022-08-22 18:16:40 +0300 Bump version number for 5.3.3alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 44fedddc11c3f6ec2f7fe35a3e38f15ca93f90eb Author: Lasse Collin Date: 2022-08-22 18:13:56 +0300 Add NEWS for 5.3.3alpha. NEWS | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) commit a93e235d7ca764cc19f8f9f9063b40ff361c3cfa Author: Lasse Collin Date: 2022-07-12 17:59:41 +0300 Translations: Add Portuguese translation. Jia Tan made white-space changes and also changed "Language: pt_BR\n" to pt. The translator wasn't reached so I'm hoping these changes are OK and will commit it without translator's approval. Thanks to Pedro Albuquerque and Jia Tan. po/LINGUAS | 1 + po/pt.po | 1001 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1002 insertions(+) commit e7cf5a946f25e40d77f45e41f0dee7d42a04e9ae Author: Lasse Collin Date: 2022-07-10 21:16:40 +0300 Translations: Add Serbian translation. Quite a few white-space changes were made by Jia Tan to make this look good. Contacting the translator didn't succeed so I'm committing this without getting translator's approval. Thanks to Мирослав Николић (Miroslav Nikolic) and Jia Tan. po/LINGUAS | 1 + po/sr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 988 insertions(+) commit cc8617ab53b1f2a4da212fa76c92fe538269c5de Author: Lasse Collin Date: 2022-07-04 23:51:36 +0300 Translations: Add Swedish translation. Thanks to Sebastian Rasmussen and Jia Tan. po/LINGUAS | 1 + po/sv.po | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 984 insertions(+) commit c613598c358b640682d0ca2aed38fa7df763e8c7 Author: Lasse Collin Date: 2022-07-04 23:40:27 +0300 Translations: Add Esperanto translation. Thanks to Keith Bowes and Jia Tan. po/LINGUAS | 1 + po/eo.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 985 insertions(+) commit 659a587d678f21e98e91d2751c31d4ce050c081a Author: Lasse Collin Date: 2022-07-01 00:22:33 +0300 Translations: Add Catalan translation. Thanks to Jordi Mas and Jia Tan. po/LINGUAS | 1 + po/ca.po | 1076 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1077 insertions(+) commit 739fd8c9bdf1d85f57d56642aad87148d4779530 Author: Lasse Collin Date: 2022-06-30 17:47:08 +0300 Translations: Add Ukrainian translation. Thanks to Yuri Chornoivan and Jia Tan. po/LINGUAS | 1 + po/uk.po | 996 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 997 insertions(+) commit 73280550b111930c62a667e56add8fd574f80bc8 Author: Lasse Collin Date: 2022-06-30 17:45:26 +0300 Translators: Add Romanian translation. Thanks to Remus-Gabriel Chelu and Jia Tan. po/LINGUAS | 1 + po/ro.po | 1016 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1017 insertions(+) commit 2465f5b825152714b2c357d96c2422c31109d320 Author: Lasse Collin Date: 2022-06-29 18:33:32 +0300 Translations: Update Brazilian Portuguese translation. One msgstr was changed. The diff is long due to changes in the source code line numbers in the comments. Thanks to Rafael Fontenelle. po/pt_BR.po | 186 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 92 insertions(+), 94 deletions(-) commit 434e1ffd3e62719d509da10b646216f5ef53fe4d Author: Lasse Collin Date: 2022-06-29 18:04:44 +0300 Translations: Add Croatian translation. Thanks to Božidar Putanec and Jia Tan. po/LINGUAS | 1 + po/hr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 988 insertions(+) commit 0732d0f7065c9bd48bfe4f5983144ae970c4a499 Author: Lasse Collin Date: 2022-06-29 17:58:48 +0300 Translations: Add Spanish translation. Thanks to Cristian Othón Martínez Vera and Jia Tan. po/LINGUAS | 1 + po/es.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 985 insertions(+) commit 9899b0f82bc130998d1f1f618a6ab805b73f2696 Author: Lasse Collin Date: 2022-06-29 17:49:43 +0300 Translations: Add Korean translation. Thanks to Seong-ho Cho and Jia Tan. po/LINGUAS | 1 + po/ko.po | 972 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 973 insertions(+) commit 65217eaf6bd195f3ef027d4ac55d57b7d133d69f Author: Lasse Collin Date: 2020-03-16 21:57:21 +0200 Translations: Rebuild cs.po to avoid incorrect fuzzy strings. "make dist" updates the .po files and the fuzzy strings would result in multiple very wrong translations. po/cs.po | 592 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 322 insertions(+), 270 deletions(-) commit e56ff423ee7af9e648e16b132f0d835d2cb4db26 Author: Lasse Collin Date: 2020-03-16 17:30:39 +0200 Translations: Add partial Danish translation. I made a few minor white space changes without getting them approved by the Danish translation team. po/LINGUAS | 1 + po/da.po | 896 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 897 insertions(+) commit 43e09c62e77cb8807d932c81de4decbdb956e184 Author: Lasse Collin Date: 2020-03-11 22:37:54 +0200 Translations: Add hu, zh_CN, and zh_TW. I made a few white space changes to these without getting them approved by the translation teams. (I tried to contact the hu and zh_TW teams but didn't succeed. I didn't contact the zh_CN team.) po/LINGUAS | 3 + po/hu.po | 985 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_CN.po | 963 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_TW.po | 956 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 2907 insertions(+) commit 982b6b198ae1ffe6093236dd8a3d03d1415b912b Author: Lasse Collin Date: 2020-03-11 14:33:30 +0200 Translations: Update vi.po to match the file from the TP. The translated strings haven't been updated but word wrapping is different. po/vi.po | 407 ++++++++++++++++++++++++++++----------------------------------- 1 file changed, 179 insertions(+), 228 deletions(-) commit 801f39691fc4abc6dd33d7653d498781b593f3eb Author: Lasse Collin Date: 2020-03-11 14:18:03 +0200 Translations: Add fi and pt_BR, and update de, fr, it, and pl. The German translation isn't identical to the file in the Translation Project but the changes (white space changes only) were approved by the translator Mario Blättermann. po/LINGUAS | 2 + po/de.po | 476 ++++++++++++++-------------- po/fi.po | 974 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/fr.po | 272 ++++++++-------- po/it.po | 479 ++++++++++++---------------- po/pl.po | 239 +++++++------- po/pt_BR.po | 1001 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 2697 insertions(+), 746 deletions(-) commit 311e4f85ede5d2f0bb71f3ad70b5b7db1b5adf33 Author: Lasse Collin Date: 2022-08-22 17:27:19 +0300 xz: Try to clarify --memlimit-mt-decompress vs. --memlimit-compress. src/xz/xz.1 | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) commit df23c31000283c00e5ef1ca32a0bc3bb757bd707 Author: Lasse Collin Date: 2022-08-22 16:46:18 +0300 CMake: Add liblzma tests. Thanks to Jia Tan for the patch. CMakeLists.txt | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) commit 02a777f9c422d3b5ec895078530bb1a2b6f7bdf5 Author: Lasse Collin Date: 2022-08-19 23:32:22 +0300 xz: Revise --info-memory output. The strings could be more descriptive but it's good to have some version of this committed now. --robot mode wasn't changed yet. src/xz/hardware.c | 32 ++++++++++++++++++++++++++------ src/xz/xz.1 | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) commit f864f6d42eab57ea8ed82cc2dd19a03b51377442 Author: Lasse Collin Date: 2022-08-19 23:12:02 +0300 xz: Update the man page for threaded decompression and memlimits. This documents the changes made in commits 6c6da57ae2aa962aabde6892442227063d87e88c, cad299008cf73ec566f0662a9cf2b94f86a99659, and 898faa97287a756231c663a3ed5165672b417207. The --info-memory bit hasn't been finished yet even though it's already mentioned in this commit under --memlimit-mt-decompress and --threads. src/xz/xz.1 | 148 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 121 insertions(+), 27 deletions(-) commit d13bfcc0056617dd648f655a01653932fad7067f Author: Lasse Collin Date: 2022-08-18 17:49:16 +0300 Build: Include the CMake files in the distribution. This was supposed to be done in 2020 with 5.2.5 release already but it was noticed only today. 5.2.5 and 5.2.6 even mention experiemental CMake support in the NEWS entries. Thanks to Olivier B. for reporting the problem. Makefile.am | 2 ++ 1 file changed, 2 insertions(+) commit e66787bcfefdb93f19c974f895f65969a77937b0 Author: Lasse Collin Date: 2022-08-18 17:38:05 +0300 Windows: Fix broken liblzma.dll build with Visual Studio project files. The bug was introduced in 352ba2d69af2136bc814aa1df1a132559d445616 "Windows: Fix building of resource files when config.h isn't used." That commit fixed liblzma.dll build with CMake while keeping it working with Autotools on Windows but the VS project files were forgotten. I haven't tested these changes. Thanks to Olivier B. for reporting the bug and for the initial patch. windows/vs2013/liblzma_dll.vcxproj | 6 ++++++ windows/vs2017/liblzma_dll.vcxproj | 6 ++++++ windows/vs2019/liblzma_dll.vcxproj | 6 ++++++ 3 files changed, 18 insertions(+) commit c4e8e5fb311225b8b48d34157891a640b2535e0c Author: Lasse Collin Date: 2022-08-18 17:16:49 +0300 liblzma: Threaded decoder: Improve LZMA_FAIL_FAST when LZMA_FINISH is used. It will now return LZMA_DATA_ERROR (not LZMA_OK or LZMA_BUF_ERROR) if LZMA_FINISH is used and there isn't enough input to finish decoding the Block Header or the Block. The use of LZMA_DATA_ERROR is simpler and the less risky than LZMA_BUF_ERROR but this might be changed before 5.4.0. src/liblzma/api/lzma/container.h | 6 +++++ src/liblzma/common/stream_decoder_mt.c | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) commit 6dcf606e7efa2b259f0262f9e2f61e00116842d3 Author: Lasse Collin Date: 2022-08-12 18:31:47 +0300 Add NEWS for 5.2.6. NEWS | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) commit 413b86fcf8934fae5a004f378a9483d37d8fcaab Author: Lasse Collin Date: 2022-08-12 14:28:41 +0300 Add Jia Tan to AUTHORS. AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 352672732c346c546ff3c26d0605bc0ed1c8b7c7 Author: Lasse Collin Date: 2022-07-25 19:28:26 +0300 Build: Start the generated ChangeLog from around 5.2.0 instead of 5.0.0. This makes ChangeLog smaller. Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6f6d11225d6016be2bbb55d845b66f6b04d048df Author: Lasse Collin Date: 2022-07-25 19:11:05 +0300 Translations: Change the copyright comment string to use with po4a. This affects the second line in po4a/xz-man.pot. The man pages of xzdiff, xzgrep, and xzmore are from GNU gzip and under GNU GPLv2+ while the rest of the man pages are in the public domain. po4a/update-po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 61f8ec804abdb4c5dac01e8ae9b90c7be58a5c24 Author: Jia Tan Date: 2022-07-25 18:30:05 +0300 liblzma: Refactor lzma_mf_is_supported() to use a switch-statement. src/liblzma/lz/lz_encoder.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) commit 4d80b463a1251aa22eabc87d2732fec13b1adda6 Author: Jia Tan Date: 2022-07-25 18:20:01 +0300 Build: Don't allow empty LIST in --enable-match-finders=LIST. It's enforced only when a match finder is needed, that is, when LZMA1 or LZMA2 encoder is enabled. configure.ac | 4 ++++ 1 file changed, 4 insertions(+) commit 9cc721af5436908f2d5a828aebbc4050a32a3487 Author: Lasse Collin Date: 2022-07-24 13:27:48 +0300 xz: Update the man page that change to --keep will be in 5.2.6. src/xz/xz.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit b81bf0c7d1873e52a4086a9abb494471d652cb55 Author: Lasse Collin Date: 2022-07-19 23:23:54 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 340cf1ec3927767046b8293a49da3db4e393f426 Author: Nicholas Jackson Date: 2022-07-17 17:39:23 -0700 CMake: Add missing source file to liblzma build CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) commit d796b6d7fdb8b7238b277056cf9146cce25db604 Author: Lasse Collin Date: 2022-07-19 23:19:49 +0300 xzgrep man page: Document exit statuses. src/scripts/xzgrep.1 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) commit 923bf96b55e5216a6c8df9d8331934f54784390e Author: Lasse Collin Date: 2022-07-19 23:13:24 +0300 xzgrep: Improve error handling, especially signals. xzgrep wouldn't exit on SIGPIPE or SIGQUIT when it clearly should have. It's quite possible that it's not perfect still but at least it's much better. If multiple exit statuses compete, now it tries to pick the largest of value. Some comments were added. The exit status handling of signals is still broken if the shell uses values larger than 255 in $? to indicate that a process died due to a signal ***and*** their "exit" command doesn't take this into account. This seems to work well with the ksh and yash versions I tried. However, there is a report in gzip/zgrep that OpenSolaris 5.11 (not 5.10) has a problem with "exit" truncating the argument to 8 bits: https://debbugs.gnu.org/cgi/bugreport.cgi?bug=22900#25 Such a bug would break xzgrep but I didn't add a workaround at least for now. 5.11 is old and I don't know if the problem exists in modern descendants, or if the problem exists in other ksh implementations in use. src/scripts/xzgrep.in | 72 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 19 deletions(-) commit a648978b20495b7aa4a8b029c5a810b5ad9d08ff Author: Lasse Collin Date: 2022-07-19 00:10:55 +0300 xzgrep: Make the fix for ZDI-CAN-16587 more robust. I don't know if this can make a difference in the real world but it looked kind of suspicious (what happens with sed implementations that cannot process very long lines?). At least this commit shouldn't make it worse. src/scripts/xzgrep.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit bd7b290f3fe4faeceb7d3497ed9bf2e6ed5e7dc5 Author: Lasse Collin Date: 2022-07-18 21:52:31 +0300 xzgrep: Use grep -H --label when available (GNU, *BSDs). It avoids the use of sed for prefixing filenames to output lines. Using sed for that is slower and prone to security bugs so now the sed method is only used as a fallback. This also fixes an actual bug: When grepping a binary file, GNU grep nowadays prints its diagnostics to stderr instead of stdout and thus the sed-method for prefixing the filename doesn't work. So with this commit grepping binary files gives reasonable output with GNU grep now. This was inspired by zgrep but the implementation is different. src/scripts/xzgrep.in | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) commit b56729af9f1a596e57aeefd7570d8d7dce5c9f52 Author: Lasse Collin Date: 2022-07-18 21:10:25 +0300 xzgrep: Use -e to specify the pattern to grep. Now we don't need the separate test for adding the -q option as it can be added directly in the two places where it's needed. src/scripts/xzgrep.in | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) commit bad61b5997e6647911974022bfb72f3d4818a594 Author: Lasse Collin Date: 2022-07-18 19:18:48 +0300 Scripts: Use printf instead of echo in a few places. It's a good habbit as echo has some portability corner cases when the string contents can be anything. src/scripts/xzdiff.in | 6 +++--- src/scripts/xzgrep.in | 4 ++-- src/scripts/xzless.in | 4 ++-- src/scripts/xzmore.in | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) commit 6a4a4a7d2667837dc824c26fcb19ed6ca5aff645 Author: Lasse Collin Date: 2022-07-17 21:36:25 +0300 xzgrep: Add more LC_ALL=C to avoid bugs with multibyte characters. Also replace one use of expr with printf. The rationale for LC_ALL=C was already mentioned in 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 that fixed a security issue. However, unrelated uses weren't changed in that commit yet. POSIX says that with sed and such tools one should use LC_ALL=C to ensure predictable behavior when strings contain byte sequences that aren't valid multibyte characters in the current locale. See under "Application usage" in here: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html With GNU sed invalid multibyte strings would work without this; it's documented in its Texinfo manual. Some other implementations aren't so forgiving. src/scripts/xzgrep.in | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) commit b48f9d615f2c2e8d2f6e253d0e48ee66d0652b68 Author: Lasse Collin Date: 2022-07-17 20:55:16 +0300 xzgrep: Fix parsing of certain options. Fix handling of "xzgrep -25 foo" (in GNU grep "grep -25 foo" is an alias for "grep -C25 foo"). xzgrep would treat "foo" as filename instead of as a pattern. This bug was fixed in zgrep in gzip in 2012. Add -E, -F, -G, and -P to the "no argument required" list. Add -X to "argument required" list. It is an intentionally-undocumented GNU grep option so this isn't an important option for xzgrep but it seems that other grep implementations (well, those that I checked) don't support -X so I hope this change is an improvement still. grep -d (grep --directories=ACTION) requires an argument. In contrast to zgrep, I kept -d in the "no argument required" list because it's not supported in xzgrep (or zgrep). This way "xzgrep -d" gives an error about option being unsupported instead of telling that it requires an argument. Both zgrep and xzgrep tell that it's unsupported if an argument is specified. Add comments. src/scripts/xzgrep.in | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) commit 2d2201bc6315deda4d43625aa510972467bd51d4 Author: Lasse Collin Date: 2022-07-14 20:33:05 +0300 Tests: Add the .lzma files to test_files.sh. tests/test_files.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) commit ce5549a591bf126300618879f5b24023351baff6 Author: Lasse Collin Date: 2022-07-14 19:37:42 +0300 Tests: Add .lzma test files. tests/files/README | 63 ++++++++++++++++----- tests/files/bad-too_big_size-with_eopm.lzma | Bin 0 -> 37 bytes tests/files/bad-too_small_size-without_eopm-1.lzma | Bin 0 -> 31 bytes tests/files/bad-too_small_size-without_eopm-2.lzma | Bin 0 -> 31 bytes tests/files/bad-too_small_size-without_eopm-3.lzma | Bin 0 -> 36 bytes tests/files/bad-unknown_size-without_eopm.lzma | Bin 0 -> 31 bytes tests/files/good-known_size-with_eopm.lzma | Bin 0 -> 37 bytes tests/files/good-known_size-without_eopm.lzma | Bin 0 -> 31 bytes tests/files/good-unknown_size-with_eopm.lzma | Bin 0 -> 37 bytes 9 files changed, 50 insertions(+), 13 deletions(-) commit 107c93ee5cad51a6ea0cee471209bfe8d76deaa3 Author: Lasse Collin Date: 2022-07-14 18:12:38 +0300 liblzma: Rename a variable and improve a comment. src/liblzma/lzma/lzma_decoder.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) commit 511feb5eadb988d641b025d597f4fac7502003b8 Author: Lasse Collin Date: 2022-07-13 22:24:41 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 9595a3119b9faf0ce01375329cad8bbf85c35ea2 Author: Lasse Collin Date: 2022-07-13 22:24:07 +0300 liblzma: Add optional autodetection of LZMA end marker. Turns out that this is needed for .lzma files as the spec in LZMA SDK says that end marker may be present even if the size is stored in the header. Such files are rare but exist in the real world. The code in liblzma is so old that the spec didn't exist in LZMA SDK back then and I had understood that such files weren't possible (the lzma tool in LZMA SDK didn't create such files). This modifies the internal API so that LZMA decoder can be told if EOPM is allowed even when the uncompressed size is known. It's allowed with .lzma and not with other uses. Thanks to Karl Beldan for reporting the problem. doc/lzma-file-format.txt | 11 +++- src/liblzma/common/alone_decoder.c | 2 +- src/liblzma/common/microlzma_decoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 10 +++- src/liblzma/lz/lz_decoder.h | 8 +-- src/liblzma/lzma/lzma2_decoder.c | 2 +- src/liblzma/lzma/lzma_decoder.c | 99 ++++++++++++++++++++++++---------- 7 files changed, 95 insertions(+), 39 deletions(-) commit 0c0f8e9761eb6eaf199082cf144db7ac5f9d8cb2 Author: Lasse Collin Date: 2022-07-12 18:53:04 +0300 xz: Document the special memlimit case of 2000 MiB on MIPS32. See commit fc3d3a7296ef58bb799a73943636b8bfd95339f7. src/xz/xz.1 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit d1bfa3dc703325ecd974167e864a8712fdfe936e Author: Jia Tan Date: 2022-07-01 21:19:26 +0800 Created script to generate code coverage reports. The script uses lcov and genhtml after running the tests to show the code coverage statistics. The script will create a coverage directory where it is run. It can be run both in and out of the source directory. .gitignore | 4 +++ tests/code_coverage.sh | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) commit 86a30b0255d8064169fabfd213d907016d2f9f2a Author: Jia Tan Date: 2022-06-16 17:32:19 +0300 Tests: Add more tests into test_check. tests/test_check.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++-- tests/tests.h | 23 +++++ 2 files changed, 295 insertions(+), 7 deletions(-) commit 82e30fed66a89706388a8c15dc954d84e63f38fa Author: Lasse Collin Date: 2022-06-16 15:02:57 +0300 Tests: Use char[][24] array for enum_strings_lzma_ret. Array of pointers to short strings is a bit pointless here and now it's fully const. tests/tests.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5ba9459e6c4a29f6870ca78ce8ac6e519d59c41e Author: Lasse Collin Date: 2022-06-16 14:12:14 +0300 Tests: tuktest.h: Add tuktest_error_impl to help with error conditions. tests/tuktest.h | 72 +++++++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 40 deletions(-) commit b339892668da20aea22a93668c82b87a38e4a97f Author: Lasse Collin Date: 2022-06-16 13:29:59 +0300 Tests: tuktest.h: Rename file_from_* and use tuktest_malloc there. tests/test_bcj_exact_size.c | 4 +--- tests/tuktest.h | 52 +++++++++++++++++++++------------------------ 2 files changed, 25 insertions(+), 31 deletions(-) commit d8b63a0ad68d1c461eb373466679ebc41fbc207d Author: Lasse Collin Date: 2022-06-16 13:08:19 +0300 Tests: tuktest.h: Add malloc wrapper with automatic freeing. tests/tuktest.h | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) commit 1d51536a4b19a8fae768f8eb462fc2238cb36d53 Author: Lasse Collin Date: 2022-06-16 11:47:37 +0300 Tests: tuktest.h: Move a function. tests/tuktest.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) commit 70c7555f6403553ee35539e869de0025592d8564 Author: Lasse Collin Date: 2022-06-14 22:21:15 +0300 Tests: test_vli: Remove an invalid test-assertion. lzma_vli is unsigned so trying a signed value results in a compiler warning from -Wsign-conversion. (lzma_vli)-1 equals to LZMA_VLI_UNKNOWN anyway which is the next assertion. tests/test_vli.c | 2 -- 1 file changed, 2 deletions(-) commit 154b73c5a1092c3f785e01666b564ad7ff1be555 Author: Lasse Collin Date: 2022-06-14 22:10:10 +0300 Tests: test_vli: Add const where appropriate. tests/test_vli.c | 53 ++++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 25 deletions(-) commit 0354d6cce3ff98ea6f927107baf216253f6ce2bb Author: Jia Tan Date: 2022-06-13 20:27:03 +0800 Added vli tests to .gitignore .gitignore | 1 + 1 file changed, 1 insertion(+) commit a08f5ccf6bdc20ef70e41f6f3321618ef146f96e Author: Jia Tan Date: 2022-06-12 11:31:40 +0800 Created tests for all functions exported in vli.h Achieved 100% code coverage vli_encoder.c, vli_decoder.c, and vli_size.c tests/Makefile.am | 4 +- tests/test_vli.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 311 insertions(+), 1 deletion(-) commit 1e3eb61815a91c0a1bfbb802e2d95593f523491f Author: jiat75 Date: 2022-06-03 21:24:54 +0800 Added parallel test artifacts to .gitignore .gitignore | 2 ++ 1 file changed, 2 insertions(+) commit 00e3613f1212eaa84e721585fdb4de6967cf2476 Author: Lasse Collin Date: 2022-06-14 21:29:21 +0300 Tests: Use good-1-empty-bcj-lzma2.xz in test_bcj_exact_size. It's much nicer this way so that the test data isn't a hardcoded table inside the C file. tests/test_bcj_exact_size.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) commit 86bab755be252bfd3e0a9aee8e7b83a9bbb23ed0 Author: Lasse Collin Date: 2022-06-14 21:26:13 +0300 Tests: Add file reading helpers to tuktest.h. tests/tuktest.h | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 7 deletions(-) commit 83d2337b72dbf391c6f3b41889eea99e51679105 Author: Lasse Collin Date: 2022-06-14 18:21:57 +0300 Tests: tuktest.h: Move a printf from a macro to a helper function. tests/tuktest.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) commit f9e8176ea7d520797a2db2d49a5a632c285674a8 Author: Lasse Collin Date: 2022-06-14 17:20:49 +0300 Tests: Add test file good-1-empty-bcj-lzma2.xz. This is from test_bcj_exact_size.c. It's good to have it as a standalone file. tests/files/README | 5 +++++ tests/files/good-1-empty-bcj-lzma2.xz | Bin 0 -> 52 bytes 2 files changed, 5 insertions(+) commit aa75c5563a760aea3aa23d997d519e702e82726b Author: Jia Tan Date: 2022-06-10 21:35:18 +0800 Tests: Created tests for hardware functions. Created tests for all API functions exported in src/liblzma/api/lzma/hardware.h. The tests are fairly trivial but are helpful because they will inform users if their machines cannot support these functions. They also improve the code coverage metrics. .gitignore | 1 + tests/Makefile.am | 2 ++ tests/test_hardware.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) commit 5c8ffdca20813939818843476fb212dfae8838a3 Author: Lasse Collin Date: 2022-06-02 21:01:45 +0300 Tests: Convert test_check to tuktest. Thanks to Jia Tan for help with all the tests. tests/test_check.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) commit faf5ff8899d539b4dcd2a7e5280cb820a4746c86 Author: Lasse Collin Date: 2022-06-02 20:31:03 +0300 Tests: Convert test_block_header to tuktest. tests/test_block_header.c | 89 +++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 37 deletions(-) commit 754d39fbebee3782258d42f154a223d3c5770ec7 Author: Lasse Collin Date: 2022-06-02 20:28:23 +0300 Tests: Convert test_bcj_exact_size to tuktest. The compress() and decompress() functions were merged because the later depends on the former so they need to be a single test case. tests/test_bcj_exact_size.c | 75 +++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 34 deletions(-) commit 96da21470f9570cd08286906a050a7c22631775b Author: Lasse Collin Date: 2022-06-02 20:27:00 +0300 Tests: Include tuktest.h in tests.h. This breaks -Werror because none of the tests so far use tuktest.h and thus there are warnings about unused variables and functions. tests/tests.h | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) commit df71ba1c991f60c3269aaadd398247e632714626 Author: Lasse Collin Date: 2022-06-02 20:25:21 +0300 Tests: Add tuktest.h mini-test-framework. tests/Makefile.am | 1 + tests/tuktest.h | 752 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 753 insertions(+) commit 4773608554d1b684a05ff9c1d879cf5c42266d33 Author: Lasse Collin Date: 2022-05-23 21:31:36 +0300 Build: Enable Automake's parallel test harness. It has been the default for quite some time already and the old serial harness isn't discouraged. The downside is that with parallel tests one cannot print progress info or other diagnostics to the terminal; all output from the tests will be in the log files only. But now that the compression tests are separated the parallel tests will speed things up. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 9a6dd6d46f7f256a5573e5d874c1052033ed7b05 Author: Lasse Collin Date: 2022-05-23 21:17:47 +0300 Tests: Split test_compress.sh into separate test unit for each file. test_compress.sh now takes one command line argument: a filename to be tested. If it begins with "compress_generated_" the file will be created with create_compress_files. This will allow parallel execution of the slow tests. tests/Makefile.am | 11 +++- tests/test_compress.sh | 91 +++++++++++++++++++--------------- tests/test_compress_generated_abc | 3 ++ tests/test_compress_generated_random | 3 ++ tests/test_compress_generated_text | 3 ++ tests/test_compress_prepared_bcj_sparc | 3 ++ tests/test_compress_prepared_bcj_x86 | 3 ++ 7 files changed, 77 insertions(+), 40 deletions(-) commit c7758ac9c734707514dd34f254173ebac5eea7f8 Author: Lasse Collin Date: 2022-05-23 20:32:49 +0300 Test: Make create_compress_files.c a little more flexible. If a command line argument is given, then only the test file of that type is created. It's quite dumb in sense that unknown names don't give an error but it's good enough here. Also use EXIT_FAILURE instead of 1 as exit status for errors. tests/create_compress_files.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) commit 4a8e4a7b0ad4b03c0ac6862716c3457452cdaf8c Author: Lasse Collin Date: 2022-05-23 20:17:42 +0300 Tests: Remove unneeded commented lines from test_compress.sh. tests/test_compress.sh | 13 ------------- 1 file changed, 13 deletions(-) commit 2ee50d150ee009f36135540b459e6ff328549725 Author: Lasse Collin Date: 2022-05-23 20:16:00 +0300 Tests: Remove progress indicator from test_compress.sh. It will be useless with Automake's parallel tests. tests/test_compress.sh | 9 --------- 1 file changed, 9 deletions(-) commit 2ce4f36f179a81d0c6e182a409f363df759d1ad0 Author: Lasse Collin Date: 2022-05-23 19:37:18 +0300 liblzma: Silence a warning. The actual initialization is done via mythread_sync and seems that GCC doesn't necessarily see that it gets initialized there. src/liblzma/common/stream_decoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5d8f3764ef43c35910e6d7003c0900a961ef6544 Author: Lasse Collin Date: 2022-04-14 20:53:16 +0300 xz: Fix build with --disable-threads. src/xz/hardware.c | 4 ++++ 1 file changed, 4 insertions(+) commit 1d592897278b172d8549aa29c3a1f3a4f432a9b9 Author: Lasse Collin Date: 2022-04-14 14:50:17 +0300 xz: Change the cap of the default -T0 memlimit for 32-bit xz. The SIZE_MAX / 3 was 1365 MiB. 1400 MiB gives little more room and it looks like a round (artificial) number in --info-memory once --info-memory is made to display it. Also, using #if avoids useless code on 64-bit builds. src/xz/hardware.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit c77fe55ddb7752ed0fec46967c5ec9a72632ea0c Author: Lasse Collin Date: 2022-04-14 14:20:46 +0300 xz: Add a default soft memory usage limit for --threads=0. This is a soft limit in sense that it only affects the number of threads. It never makes xz fail and it never makes xz change settings that would affect the compressed output. The idea is to make -T0 have more reasonable behavior when the system has very many cores or when a memory-hungry compression options are used. This also helps with 32-bit xz, preventing it from running out of address space. The downside of this commit is that now the number of threads might become too low compared to what the user expected. I hope this to be an acceptable compromise as the old behavior has been a source of well-argued complaints for a long time. src/xz/coder.c | 28 ++++++++++++++++++++++++++-- src/xz/hardware.c | 38 +++++++++++++++++++++++++++++--------- src/xz/hardware.h | 27 +++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 11 deletions(-) commit 0adc13bfe32c14f3e4c6ce9f2d4fdf4112ab53f4 Author: Lasse Collin Date: 2022-04-14 12:59:09 +0300 xz: Make -T0 use multithreaded mode on single-core systems. The main problem withi the old behavior is that the compressed output is different on single-core systems vs. multicore systems. This commit fixes it by making -T0 one thread in multithreaded mode on single-core systems. The downside of this is that it uses more memory. However, if --memlimit-compress is used, xz can (thanks to the previous commit) drop to the single-threaded mode still. src/xz/coder.c | 18 +++++++++--------- src/xz/hardware.c | 14 ++++++++++++++ src/xz/hardware.h | 4 ++++ 3 files changed, 27 insertions(+), 9 deletions(-) commit 898faa97287a756231c663a3ed5165672b417207 Author: Lasse Collin Date: 2022-04-14 12:38:00 +0300 xz: Changes to --memlimit-compress and --no-adjust. In single-threaded mode, --memlimit-compress can make xz scale down the LZMA2 dictionary size to meet the memory usage limit. This obviously affects the compressed output. However, if xz was in threaded mode, --memlimit-compress could make xz reduce the number of threads but it wouldn't make xz switch from multithreaded mode to single-threaded mode or scale down the LZMA2 dictionary size. This seemed illogical and there was even a "FIXME?" about it. Now --memlimit-compress can make xz switch to single-threaded mode if one thread in multithreaded mode uses too much memory. If memory usage is still too high, then the LZMA2 dictionary size can be scaled down too. The option --no-adjust was also changed so that it no longer prevents xz from scaling down the number of threads as that doesn't affect compressed output (only performance). After this commit --no-adjust only prevents adjustments that affect compressed output, that is, with --no-adjust xz won't switch from multithreaded mode to single-threaded mode and won't scale down the LZMA2 dictionary size. The man page wasn't updated yet. src/xz/coder.c | 63 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 20 deletions(-) commit cad299008cf73ec566f0662a9cf2b94f86a99659 Author: Lasse Collin Date: 2022-04-11 22:20:49 +0300 xz: Add --memlimit-mt-decompress along with a default limit value. --memlimit-mt-decompress allows specifying the limit for multithreaded decompression. This matches memlimit_threading in liblzma. This limit can only affect the number of threads being used; it will never prevent xz from decompressing a file. The old --memlimit-decompress option is still used at the same time. If the value of --memlimit-decompress (the default value or one specified by the user) is less than the value of --memlimit-mt-decompress , then --memlimit-mt-decompress is reduced to match --memlimit-decompress. Man page wasn't updated yet. src/xz/args.c | 24 +++++++++++++++------- src/xz/coder.c | 34 ++++++++++--------------------- src/xz/hardware.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----- src/xz/hardware.h | 17 ++++++++++------ src/xz/message.c | 4 +++- 5 files changed, 97 insertions(+), 42 deletions(-) commit fe87b4cd5364f5bbb6a75a0299f1500c852d7c9a Author: Lasse Collin Date: 2022-04-06 23:11:59 +0300 liblzma: Threaded decoder: Improve setting of pending_error. It doesn't need to be done conditionally. The comments try to explain it. src/liblzma/common/stream_decoder_mt.c | 51 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) commit 90621da7f6e1bfd6d91d60415eae04b2bca274c2 Author: Lasse Collin Date: 2022-04-06 12:39:49 +0300 liblzma: Add a new flag LZMA_FAIL_FAST for threaded decoder. In most cases if the input file is corrupt the application won't care about the uncompressed content at all. With this new flag the threaded decoder will return an error as soon as any thread has detected an error; it won't wait to copy out the data before the location of the error. I don't plan to use this in xz to keep the behavior consistent between single-threaded and multi-threaded modes. src/liblzma/api/lzma/container.h | 25 ++++++++++++++++- src/liblzma/common/common.h | 7 +++-- src/liblzma/common/stream_decoder_mt.c | 50 +++++++++++++++++++--------------- 3 files changed, 56 insertions(+), 26 deletions(-) commit 64b6d496dc815a176d8307f418f6834a26783484 Author: Lasse Collin Date: 2022-04-05 12:24:57 +0300 liblzma: Threaded decoder: Always wait for output if LZMA_FINISH is used. This makes the behavior consistent with the single-threaded decoder when handling truncated .xz files. Thanks to Jia Tan for finding this issue. src/liblzma/common/stream_decoder_mt.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) commit e671bc8828b9c0c5406c3a22c541301d0eb54518 Author: Lasse Collin Date: 2022-04-02 21:49:59 +0300 liblzma: Threaded decoder: Support zpipe.c-style decoding loop. This makes it possible to call lzma_code() in a loop that only reads new input when lzma_code() didn't fill the output buffer completely. That isn't the calling style suggested by the liblzma example program 02_decompress.c so perhaps the usefulness of this feature is limited. Also, it is possible to write such a loop so that it works with the single-threaded decoder but not with the threaded decoder even after this commit, or so that it works only if lzma_mt.timeout = 0. The zlib tutorial is a well-known example of a loop where more input is read only when output isn't full. Porting this as is to liblzma would work with the single-threaded decoder (if LZMA_CONCATENATED isn't used) but it wouldn't work with threaded decoder even after this commit because the loop assumes that no more output is possible when it cannot read more input ("if (strm.avail_in == 0) break;"). This cannot be fixed at liblzma side; the loop has to be modified at least a little. I'm adding this in any case because the actual code is simple and short and should have no harmful side-effects in other situations. src/liblzma/common/stream_decoder_mt.c | 77 +++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 10 deletions(-) commit 2ba8173e27be4793edb46497e499ac2ae753a316 Author: Lasse Collin Date: 2022-03-31 00:05:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 Author: Lasse Collin Date: 2022-03-29 19:19:12 +0300 xzgrep: Fix escaping of malicious filenames (ZDI-CAN-16587). Malicious filenames can make xzgrep to write to arbitrary files or (with a GNU sed extension) lead to arbitrary code execution. xzgrep from XZ Utils versions up to and including 5.2.5 are affected. 5.3.1alpha and 5.3.2alpha are affected as well. This patch works for all of them. This bug was inherited from gzip's zgrep. gzip 1.12 includes a fix for zgrep. The issue with the old sed script is that with multiple newlines, the N-command will read the second line of input, then the s-commands will be skipped because it's not the end of the file yet, then a new sed cycle starts and the pattern space is printed and emptied. So only the last line or two get escaped. One way to fix this would be to read all lines into the pattern space first. However, the included fix is even simpler: All lines except the last line get a backslash appended at the end. To ensure that shell command substitution doesn't eat a possible trailing newline, a colon is appended to the filename before escaping. The colon is later used to separate the filename from the grep output so it is fine to add it here instead of a few lines later. The old code also wasn't POSIX compliant as it used \n in the replacement section of the s-command. Using \ is the POSIX compatible method. LC_ALL=C was added to the two critical sed commands. POSIX sed manual recommends it when using sed to manipulate pathnames because in other locales invalid multibyte sequences might cause issues with some sed implementations. In case of GNU sed, these particular sed scripts wouldn't have such problems but some other scripts could have, see: info '(sed)Locale Considerations' This vulnerability was discovered by: cleemy desu wayo working with Trend Micro Zero Day Initiative Thanks to Jim Meyering and Paul Eggert discussing the different ways to fix this and for coordinating the patch release schedule with gzip. src/scripts/xzgrep.in | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) commit bd93b776c1bd15e90661033c918cdeb354dbcc38 Author: Lasse Collin Date: 2022-03-26 01:02:44 +0200 liblzma: Fix a deadlock in threaded decoder. If a worker thread has consumed all input so far and it's waiting on thr->cond and then the main thread enables partial update for that thread, the code used to deadlock. This commit allows one dummy decoding pass to occur in this situation which then also does the partial update. As part of the fix, this moves thr->progress_* updates to avoid the second thr->mutex locking. Thanks to Jia Tan for finding, debugging, and reporting the bug. src/liblzma/common/stream_decoder_mt.c | 71 +++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 19 deletions(-) commit e0394e94230f208682ac1e1f4c41f22f9ad79916 Author: Lasse Collin Date: 2022-03-23 16:34:00 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 487c77d48760564b1949c5067630b675b87be4de Author: Lasse Collin Date: 2022-03-23 16:28:55 +0200 liblzma: Threaded decoder: Don't stop threads on LZMA_TIMED_OUT. LZMA_TIMED_OUT is not an error and thus stopping threads on LZMA_TIMED_OUT breaks the decoder badly. Thanks to Jia Tan for finding the bug and for the patch. src/liblzma/common/stream_decoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6c6da57ae2aa962aabde6892442227063d87e88c Author: Lasse Collin Date: 2022-03-07 00:36:16 +0200 xz: Add initial support for threaded decompression. If threading support is enabled at build time, this will use lzma_stream_decoder_mt() even for single-threaded mode. With memlimit_threading=0 the behavior should be identical. This needs some work like adding --memlimit-threading=LIMIT. The original patch from Sebastian Andrzej Siewior included a method to get currently available RAM on Linux. It might be one way to go but as it is Linux-only, the available-RAM approach needs work for portability or using a fallback method on other OSes. The man page wasn't updated yet. src/xz/coder.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) commit 4cce3e27f529af33e0e7749a8cbcec59954946b5 Author: Lasse Collin Date: 2022-03-06 23:36:20 +0200 liblzma: Add threaded .xz decompressor. I realize that this is about a decade late. Big thanks to Sebastian Andrzej Siewior for the original patch. I made a bunch of smaller changes but after a while quite a few things got rewritten. So any bugs in the commit were created by me. src/liblzma/api/lzma/container.h | 90 +- src/liblzma/common/Makefile.inc | 5 + src/liblzma/common/common.h | 4 + src/liblzma/common/stream_decoder_mt.c | 1814 ++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 1 + 5 files changed, 1907 insertions(+), 7 deletions(-) commit 717631b9788dc9c100ee0c87d3c14a2782638ff4 Author: Lasse Collin Date: 2022-03-06 16:54:23 +0200 liblzma: Fix docs: lzma_block_decoder() cannot return LZMA_UNSUPPORTED_CHECK. If Check is unsupported, it will be silently ignored. It's the caller's job to handle it. src/liblzma/api/lzma/block.h | 3 --- 1 file changed, 3 deletions(-) commit 1a4bb97a00936535e30ac61945aeee38882b5d1a Author: Lasse Collin Date: 2022-03-06 16:41:19 +0200 liblzma: Add new output queue (lzma_outq) features. Add lzma_outq_clear_cache2() which may leave one buffer allocated in the cache. Add lzma_outq_outbuf_memusage() to get the memory needed for a single lzma_outbuf. This is now used internally in outqueue.c too. Track both the total amount of memory allocated and the amount of memory that is in active use (not in cache). In lzma_outbuf, allow storing the current input position that matches the current output position. This way the main thread can notice when no more output is possible without first providing more input. Allow specifying return code for lzma_outq_read() in a finished lzma_outbuf. src/liblzma/common/outqueue.c | 43 +++++++++++++++++++++++++++++++++++------- src/liblzma/common/outqueue.h | 44 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 8 deletions(-) commit ddbc6f58c2de388eed24cd7ea91b523d397da5f4 Author: Lasse Collin Date: 2022-03-06 15:18:58 +0200 liblzma: Index hash: Change return value type of hash_append() to void. src/liblzma/common/index_hash.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) commit 20e7a33e2d59c6a814447d3991f21e2702174b20 Author: Lasse Collin Date: 2022-02-22 03:42:57 +0200 liblzma: Minor addition to lzma_vli_size() API doc. Thanks to Jia Tan. src/liblzma/api/lzma/vli.h | 2 ++ 1 file changed, 2 insertions(+) commit 4f78f5fcf63592f2d77e921cfe0d5de300867374 Author: Lasse Collin Date: 2022-02-22 02:04:18 +0200 liblzma: Check the return value of lzma_index_append() in threaded encoder. If lzma_index_append() failed (most likely memory allocation failure) it could have gone unnoticed and the resulting .xz file would have an incorrect Index. Decompressing such a file would produce the correct uncompressed data but then an error would occur when verifying the Index field. src/liblzma/common/stream_encoder_mt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit 5313ad66b40aab822ddca3e9905254cb99a4080d Author: Lasse Collin Date: 2022-02-22 01:37:39 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 865e0a3689a25a7ee8eecae1a34c1775e3aa676e Author: Ed Maste Date: 2022-02-11 15:25:46 +0000 liblzma: Use non-executable stack on FreeBSD as on Linux src/liblzma/check/crc32_x86.S | 4 ++-- src/liblzma/check/crc64_x86.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) commit 1c9a5786d206b4abc8e427326651c8174baea753 Author: Lasse Collin Date: 2022-02-20 20:36:27 +0200 liblzma: Make Block decoder catch certain types of errors better. Now it limits the input and output buffer sizes that are passed to a raw decoder. This way there's no need to check if the sizes can grow too big or overflow when updating Compressed Size and Uncompressed Size counts. This also means that a corrupt file cannot cause the raw decoder to process useless extra input or output that would exceed the size info in Block Header (and thus cause LZMA_DATA_ERROR anyway). More importantly, now the size information is verified more carefully in case raw decoder returns LZMA_OK. This doesn't really matter with the current single-threaded .xz decoder as the errors would be detected slightly later anyway. But this helps avoiding corner cases in the upcoming threaded decompressor, and it might help other Block decoder uses outside liblzma too. The test files bad-1-lzma2-{9,10,11}.xz test these conditions. With the single-threaded .xz decoder the only difference is that LZMA_DATA_ERROR is detected in a difference place now. src/liblzma/common/block_decoder.c | 79 ++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 25 deletions(-) commit 555de11873eb00c9b94a8be70645db502e5a9dbd Author: Lasse Collin Date: 2022-02-20 19:38:55 +0200 Tests: Add bad-1-lzma2-11.xz. tests/files/README | 5 +++++ tests/files/bad-1-lzma2-11.xz | Bin 0 -> 64 bytes 2 files changed, 5 insertions(+) commit f0da507f22e7f4e3edb75b45b74d344244ca03fb Author: Lasse Collin Date: 2022-02-18 18:51:10 +0200 Translations: Fix po4a failure with the French man page translations. Thanks to Mario Blättermann for the patch. po4a/fr_FR.po | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit f7711d228c3c32395460c82498c60a9f730d0239 Author: Lasse Collin Date: 2022-02-07 01:14:37 +0200 Translations: Add French translation of man pages. This matches xz-utils 5.2.5-2 in Debian. The translation was done by "bubu", proofread by the debian-l10n-french mailing list contributors, and submitted to me on the xz-devel mailing list by Jean-Pierre Giraud. Thanks to everyone! po4a/fr_FR.po | 3541 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 3542 insertions(+), 1 deletion(-) commit 6468f7e41a8e9c611e4ba8d34e2175c5dacdbeb4 Author: jiat75 Date: 2022-01-28 20:47:55 +0800 liblzma: Add NULL checks to LZMA and LZMA2 properties encoders. Previously lzma_lzma_props_encode() and lzma_lzma2_props_encode() assumed that the options pointers must be non-NULL because the with these filters the API says it must never be NULL. It is good to do these checks anyway. src/liblzma/lzma/lzma2_encoder.c | 3 +++ src/liblzma/lzma/lzma_encoder.c | 3 +++ 2 files changed, 6 insertions(+) commit 2523c30705f49eabd27b854aa656ae87cc224808 Author: Lasse Collin Date: 2022-02-06 23:19:32 +0200 liblzma: Fix uint64_t vs. size_t confusion. This broke 32-bit builds due to a pointer type mismatch. This bug was introduced with the output-size-limited encoding in 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c. Thanks to huangqinjin for the bug report. src/liblzma/rangecoder/range_encoder.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) commit 2bd36c91d03e03b31a4f12fd0afc100ae32d66e2 Author: huangqinjin Date: 2021-12-13 20:49:21 +0800 CMake: Keep compatible with Windows 95 for 32-bit build. CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) commit 2024fbf2794885277d05378d40b2b8015a7c3b40 Author: Lasse Collin Date: 2021-11-13 21:04:05 +0200 xzgrep: Update man page timestamp. src/scripts/xzgrep.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 400e7a239a53282cedaad927a41f3463d7f542e5 Author: Lasse Collin Date: 2021-11-13 18:23:24 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3a512c7787b2642ca946f4adc6e9a0a5d9b0d5a0 Author: Ville Skyttä Date: 2021-11-13 10:11:57 +0200 xzgrep: use `grep -E/-F` instead of `egrep` and `fgrep` `egrep` and `fgrep` have been deprecated in GNU grep since 2007, and in current post 3.7 Git they have been made to emit obsolescence warnings: https://git.savannah.gnu.org/cgit/grep.git/commit/?id=a9515624709865d480e3142fd959bccd1c9372d1 src/scripts/xzgrep.1 | 8 ++++---- src/scripts/xzgrep.in | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) commit edf525e2b1840dcaf377df472c67d8f11f8ace1b Author: Lasse Collin Date: 2021-10-28 23:02:11 +0300 Bump the version number for 5.3.2alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit ea8c948655a86290524efe59cff067e06a886709 Author: Lasse Collin Date: 2021-10-28 22:59:52 +0300 Add NEWS for 5.3.2alpha. NEWS | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) commit 52435f302f4724157ec50b4210cbe42b285c3cbc Author: Lasse Collin Date: 2021-10-27 23:27:48 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit f2aea1d5a504b2021bf47a238390e4f12bdd518d Author: Lasse Collin Date: 2021-10-27 23:23:11 +0300 xz: Change the coding style of the previous commit. It isn't any better now but it's consistent with the rest of the code base. src/xz/file_io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) commit 892b16cc282f5b4e1c49871b326f4db25c5b4d81 Author: Alexander Bluhm Date: 2021-10-05 23:33:16 +0200 xz: Avoid fchown(2) failure. OpenBSD does not allow to change the group of a file if the user does not belong to this group. In contrast to Linux, OpenBSD also fails if the new group is the same as the old one. Do not call fchown(2) in this case, it would change nothing anyway. This fixes an issue with Perl Alien::Build module. https://github.com/PerlAlien/Alien-Build/issues/62 src/xz/file_io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) commit 2b509c868cae3988bf21cd693fbf9021cdc85628 Author: Lasse Collin Date: 2021-09-17 17:31:11 +0300 liblzma: Fix liblzma.map for the lzma_microlzma_* symbols. This should have been part of d267d109c370a40b502e73f8664b154b15e4f253. Thanks to Gao Xiang. src/liblzma/liblzma.map | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit cacb06a954b58255dfc084a0bc9708f43a0fd6d6 Author: Lasse Collin Date: 2021-09-09 22:21:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6928aac9da6ba612780b9f72ba1d6ecbe1e8b54e Author: Lasse Collin Date: 2021-09-09 21:41:51 +0300 liblzma: Use _MSVC_LANG to detect when "noexcept" can be used with MSVC. By default, MSVC always sets __cplusplus to 199711L. The real C++ standard version is available in _MSVC_LANG (or one could use /Zc:__cplusplus to set __cplusplus correctly). Fixes . Thanks to Dan Weiss. src/liblzma/api/lzma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit d267d109c370a40b502e73f8664b154b15e4f253 Author: Lasse Collin Date: 2021-09-05 20:38:12 +0300 liblzma: Rename EROFS LZMA to MicroLZMA. It still exists primarily for EROFS but MicroLZMA is a more generic name (that hopefully doesn't clash with something that already exists). src/liblzma/api/lzma/container.h | 33 +++++++++++++--------- src/liblzma/common/Makefile.inc | 4 +-- .../{erofs_decoder.c => microlzma_decoder.c} | 32 ++++++++++----------- .../{erofs_encoder.c => microlzma_encoder.c} | 30 ++++++++++---------- 4 files changed, 52 insertions(+), 47 deletions(-) commit 3247e95115acb95bc27f41e8cf4501db5b0b4309 Author: Lasse Collin Date: 2021-06-04 19:02:38 +0300 xzdiff: Update the man page about the exit status. This was forgotten from 194029ffaf74282a81f0c299c07f73caca3232ca. src/scripts/xzdiff.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 96f5a28a46fc93ac4e296808ac0f8631d05498bc Author: Lasse Collin Date: 2021-06-04 18:52:48 +0300 xzless: Fix less(1) version detection when it contains a dot. Sometimes the version number from "less -V" contains a dot, sometimes not. xzless failed detect the version number when it does contain a dot. This fixes it. Thanks to nick87720z for reporting this. Apparently it had been reported here in 2013. src/scripts/xzless.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5fb5212d816addbc523d0798cb482fdd0484f8fa Author: Lasse Collin Date: 2021-04-11 19:58:10 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit fc3d3a7296ef58bb799a73943636b8bfd95339f7 Author: Ivan A. Melnikov Date: 2021-04-09 11:45:10 +0300 Reduce maximum possible memory limit on MIPS32 Due to architectural limitations, address space available to a single userspace process on MIPS32 is limited to 2 GiB, not 4, even on systems that have more physical RAM -- e.g. 64-bit systems with 32-bit userspace, or systems that use XPA (an extension similar to x86's PAE). So, for MIPS32, we have to impose stronger memory limits. I've chosen 2000MiB to give the process some headroom. src/xz/hardware.c | 6 ++++++ 1 file changed, 6 insertions(+) commit e7da44d5151e21f153925781ad29334ae0786101 Author: Lasse Collin Date: 2021-02-13 23:31:27 +0200 CMake: Use interface library for better FindLibLZMA compatibility. https://www.mail-archive.com/xz-devel@tukaani.org/msg00446.html Thanks to Markus Rickert. CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) commit a61dd82ada39030f41b4ffca9ea551714908bedc Author: Lasse Collin Date: 2021-01-30 18:36:04 +0200 CMake: Try to improve compatibility with the FindLibLZMA module. The naming conflict with FindLibLZMA module gets worse. Not avoiding it in the first place was stupid. Normally find_package(LibLZMA) will use the module and find_package(liblzma 5.2.5 REQUIRED CONFIG) will use the config file even with a case insensitive file system. However, if CMAKE_FIND_PACKAGE_PREFER_CONFIG is TRUE and the file system is case insensitive, find_package(LibLZMA) will find our liblzma config file instead of using FindLibLZMA module. One big problem with this is that FindLibLZMA uses LibLZMA::LibLZMA and we use liblzma::liblzma as the target name. With target names CMake happens to be case sensitive. To workaround this, this commit adds add_library(LibLZMA::LibLZMA ALIAS liblzma::liblzma) to the config file. Then both spellings work. To make the behavior consistent between case sensitive and insensitive file systems, the config and related files are renamed from liblzmaConfig.cmake to liblzma-config.cmake style. With this style CMake looks for lowercase version of the package name so find_package(LiBLzmA 5.2.5 REQUIRED CONFIG) will work to find our config file. There are other differences between our config file and FindLibLZMA so it's still possible that things break for reasons other than the spelling of the target name. Hopefully those situations aren't too common. When the config file is available, it should always give as good or better results as FindLibLZMA so this commit doesn't affect the recommendation to use find_package(liblzma 5.2.5 REQUIRED CONFIG) which explicitly avoids FindLibLZMA. Thanks to Markus Rickert. CMakeLists.txt | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) commit 5b7bc1b8ae766a76710ca1b99f909cf52c697f05 Author: Lasse Collin Date: 2021-01-29 21:19:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6c6f0db340dcb8bb424411cedba713405d55f6b8 Author: Lasse Collin Date: 2021-01-29 21:19:08 +0200 liblzma: Fix unitialized variable. This was introduced two weeks ago in the commit 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c. Thanks to Nathan Moinvaziri. src/liblzma/lzma/lzma_encoder.c | 1 + 1 file changed, 1 insertion(+) commit bb1d5c1fdd30550d4221ecd336e0be1206132a5c Author: Lasse Collin Date: 2021-01-24 22:32:41 +0200 Tests: Add bad-1-lzma2-10.xz and also modify -9.xz. tests/files/README | 11 +++++++++-- tests/files/bad-1-lzma2-10.xz | Bin 0 -> 60 bytes tests/files/bad-1-lzma2-9.xz | Bin 72 -> 72 bytes 3 files changed, 9 insertions(+), 2 deletions(-) commit 6b8abc84a5469792e0355d0bfc0784d41cfdfef7 Author: Lasse Collin Date: 2021-01-24 19:22:35 +0200 liblzma: Fix a wrong comment in stream_encoder_mt.c. src/liblzma/common/stream_encoder_mt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 939fc5ed654aac25fe0c8684b2df8dbeadb2de1e Author: Lasse Collin Date: 2021-01-24 18:51:51 +0200 Tests: Add bad-1-lzma2-9.xz. tests/files/README | 4 ++++ tests/files/bad-1-lzma2-9.xz | Bin 0 -> 72 bytes 2 files changed, 4 insertions(+) commit fdd30032f8531ac89519b48c21d810ecf06825f6 Author: Lasse Collin Date: 2021-01-24 17:02:00 +0200 Tests: Add bad-1-check-crc32-2.xz. tests/files/README | 7 +++++++ tests/files/bad-1-check-crc32-2.xz | Bin 0 -> 72 bytes 2 files changed, 7 insertions(+) commit db465419ae26ec7fb9b9472183911ff521620c77 Author: Lasse Collin Date: 2021-01-17 19:20:50 +0200 liblzma: In EROFS LZMA decoder, verify that comp_size matches at the end. When the uncompressed size is known to be exact, after decompressing the stream exactly comp_size bytes of input must have been consumed. This is a minor improvement to error detection. src/liblzma/common/erofs_decoder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit 774cc0118ba2496581cb2621505a04bb6598cc75 Author: Lasse Collin Date: 2021-01-17 18:53:34 +0200 liblzma: Make EROFS LZMA decoder work when exact uncomp_size isn't known. The caller must still not specify an uncompressed size bigger than the actual uncompressed size. As a downside, this now needs the exact compressed size. src/liblzma/api/lzma/container.h | 23 ++++++++--- src/liblzma/common/erofs_decoder.c | 80 ++++++++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 12 deletions(-) commit 421b0aa352da244075db10205cf33712f91b9835 Author: Lasse Collin Date: 2021-01-14 20:57:11 +0200 liblzma: Fix missing normalization in rc_encode_dummy(). Without this fix it could attempt to create too much output. src/liblzma/rangecoder/range_encoder.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit 601ec0311e769fc704daaaa7dac0ca840aff080e Author: Lasse Collin Date: 2021-01-14 20:07:01 +0200 liblzma: Add EROFS LZMA encoder and decoder. Right now this is just a planned extra-compact format for use in the EROFS file system in Linux. At this point it's possible that the format will either change or be abandoned and removed completely. The special thing about the encoder is that it uses the output-size-limited encoding added in the previous commit. EROFS uses fixed-sized blocks (e.g. 4 KiB) to hold compressed data so the compressors must be able to create valid streams that fill the given block size. src/liblzma/api/lzma/container.h | 76 +++++++++++++++++++ src/liblzma/common/Makefile.inc | 2 + src/liblzma/common/erofs_decoder.c | 148 +++++++++++++++++++++++++++++++++++++ src/liblzma/common/erofs_encoder.c | 139 ++++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 2 + 5 files changed, 367 insertions(+) commit 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c Author: Lasse Collin Date: 2021-01-13 19:16:32 +0200 liblzma: Add rough support for output-size-limited encoding in LZMA1. With this it is possible to encode LZMA1 data without EOPM so that the encoder will encode as much input as it can without exceeding the specified output size limit. The resulting LZMA1 stream will be a normal LZMA1 stream without EOPM. The actual uncompressed size will be available to the caller via the uncomp_size pointer. One missing thing is that the LZMA layer doesn't inform the LZ layer when the encoding is finished and thus the LZ may read more input when it won't be used. However, this doesn't matter if encoding is done with a single call (which is the planned use case for now). For proper multi-call encoding this should be improved. This commit only adds the functionality for internal use. Nothing uses it yet. src/liblzma/common/common.h | 11 +++ src/liblzma/lz/lz_encoder.c | 16 ++++ src/liblzma/lz/lz_encoder.h | 4 + src/liblzma/lzma/lzma_encoder.c | 127 +++++++++++++++++++++++--------- src/liblzma/lzma/lzma_encoder_private.h | 12 +++ src/liblzma/rangecoder/range_encoder.h | 111 ++++++++++++++++++++++++++++ 6 files changed, 246 insertions(+), 35 deletions(-) commit 9cdabbeea891e8f1e7741b076f7db6ac05ae392a Author: Lasse Collin Date: 2021-01-11 23:57:11 +0200 Scripts: Add zstd support to xzdiff. src/scripts/xzdiff.1 | 6 ++++-- src/scripts/xzdiff.in | 16 +++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) commit d9ec3add97cf4c999a7f594c6529680227b6c274 Author: Lasse Collin Date: 2021-01-11 23:41:30 +0200 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 074259f4f3966aeac6edb205fecbc1a8d2b58bb2 Author: Lasse Collin Date: 2021-01-11 23:41:16 +0200 xz: Make --keep accept symlinks, hardlinks, and setuid/setgid/sticky. Previously this required using --force but that has other effects too which might be undesirable. Changing the behavior of --keep has a small risk of breaking existing scripts but since this is a fairly special corner case I expect the likehood of breakage to be low enough. I think the new behavior is more logical. The only reason for the old behavior was to be consistent with gzip and bzip2. Thanks to Vincent Lefevre and Sebastian Andrzej Siewior. src/xz/file_io.c | 9 +++++---- src/xz/xz.1 | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) commit 73c555b3077c19dda29b6f4592ced2af876f8333 Author: Lasse Collin Date: 2021-01-11 23:28:52 +0200 Scripts: Fix exit status of xzgrep. Omit the -q option from xz, gzip, and bzip2. With xz this shouldn't matter. With gzip it's important because -q makes gzip replace SIGPIPE with exit status 2. With bzip2 it's important because with -q bzip2 is completely silent if input is corrupt while other decompressors still give an error message. Avoiding exit status 2 from gzip is important because bzip2 uses exit status 2 to indicate corrupt input. Before this commit xzgrep didn't recognize corrupt .bz2 files because xzgrep was treating exit status 2 as SIGPIPE for gzip compatibility. zstd still needs -q because otherwise it is noisy in normal operation. The code to detect real SIGPIPE didn't check if the exit status was due to a signal (>= 128) and so could ignore some other exit status too. src/scripts/xzgrep.in | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) commit 194029ffaf74282a81f0c299c07f73caca3232ca Author: Lasse Collin Date: 2021-01-11 22:01:51 +0200 Scripts: Fix exit status of xzdiff/xzcmp. This is a minor fix since this affects only the situation when the files differ and the exit status is something else than 0. In such case there could be SIGPIPE from a decompression tool and that would result in exit status of 2 from xzdiff/xzcmp while the correct behavior would be to return 1 or whatever else diff or cmp may have returned. This commit omits the -q option from xz/gzip/bzip2/lzop arguments. I'm not sure why the -q was used in the first place, perhaps it hides warnings in some situation that I cannot see at the moment. Hopefully the removal won't introduce a new bug. With gzip the -q option was harmful because it made gzip return 2 instead of >= 128 with SIGPIPE. Ignoring exit status 2 (warning from gzip) isn't practical because bzip2 uses exit status 2 to indicate corrupt input file. It's better if SIGPIPE results in exit status >= 128. With bzip2 the removal of -q seems to be good because with -q it prints nothing if input is corrupt. The other tools aren't silent in this situation even with -q. On the other hand, if zstd support is added, it will need -q since otherwise it's noisy in normal situations. Thanks to Étienne Mollier and Sebastian Andrzej Siewior. src/scripts/xzdiff.in | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) commit f7fa309e1f7178d04c7bedc03b73077639371e97 Author: Lasse Collin Date: 2021-01-09 21:14:36 +0200 liblzma: Make lzma_outq usable for threaded decompression too. Before this commit all output queue buffers were allocated as a single big allocation. Now each buffer is allocated separately when needed. Used buffers are cached to avoid reallocation overhead but the cache will keep only one buffer size at a time. This should make things work OK in the decompression where most of the time the buffer sizes will be the same but with some less common files the buffer sizes may vary. While this should work fine, it's still a bit preliminary and may even get reverted if it turns out to be useless for decompression. src/liblzma/common/outqueue.c | 268 +++++++++++++++++++++------------ src/liblzma/common/outqueue.h | 138 ++++++++++++----- src/liblzma/common/stream_encoder_mt.c | 52 ++++--- 3 files changed, 301 insertions(+), 157 deletions(-) commit a35a69d693ce37d4ba7c1855bda7d9cfa13d1778 Author: Lasse Collin Date: 2020-12-23 17:15:49 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4fd79b90c52396d70e0b1206ceb1a873a0ad2589 Author: H.J. Lu Date: 2020-12-23 06:49:04 -0800 liblzma: Enable Intel CET in x86 CRC assembly codes When Intel CET is enabled, we need to include in assembly codes to mark Intel CET support and add _CET_ENDBR to indirect jump targets. Tested on Intel Tiger Lake under CET enabled Linux. src/liblzma/check/crc32_x86.S | 9 +++++++++ src/liblzma/check/crc64_x86.S | 9 +++++++++ 2 files changed, 18 insertions(+) commit bb3b8c6a23e25db79f862b1de325c56052e0354b Author: Lasse Collin Date: 2020-12-16 18:33:29 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 21588ca34af98738954fc12ded1b89d7294ef646 Author: Lasse Collin Date: 2020-12-16 18:30:14 +0200 Build: Don't build bundles on Apple OSes. Thanks to Daniel Packard. CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) commit d05b0c42dd8b38d8c6b8193c8af50e9bd3d16f28 Author: Lasse Collin Date: 2020-12-05 22:44:03 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1890351f3423627ba5c4c495402f32d7e9ed90b7 Author: Adam Borowski Date: 2020-09-25 03:35:18 +0200 Scripts: Add zstd support to xzgrep. Thanks to Adam Borowski. src/scripts/xzgrep.1 | 9 ++++++--- src/scripts/xzgrep.in | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) commit 2f108abb3d82e4e2313b438dae9c0c7c7a6366f2 Author: Lasse Collin Date: 2020-11-17 20:51:48 +0200 CMake: Fix compatibility with CMake 3.13. The syntax "if(DEFINED CACHE{FOO})" requires CMake 3.14. In some other places the code treats the cache variables like normal variables already (${FOO} or if(FOO) is used, not ${CACHE{FOO}). Thanks to ygrek for reporting the bug on IRC. CMakeLists.txt | 2 +- cmake/tuklib_cpucores.cmake | 4 ++-- cmake/tuklib_physmem.cmake | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) commit 5af726a79273fafa5de5745b117e567f21c90e49 Author: Lasse Collin Date: 2020-11-01 22:56:43 +0200 Update THANKS. THANKS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit 4575d9d365c756ec189899f9f743e0b3515ce72d Author: Lasse Collin Date: 2020-11-01 22:34:25 +0200 xz: Avoid unneeded \f escapes on the man page. I don't want to use \c in macro arguments but groff_man(7) suggests that \f has better portability. \f would be needed for the .TP strings for portability reasons anyway. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) commit 620b32f5339f86710cb4435e01ecdac972ccac73 Author: Lasse Collin Date: 2020-11-01 19:09:53 +0200 xz: Use non-breaking spaces when intentionally using more than one space. This silences some style checker warnings. Seems that spaces in the beginning of a line don't need this treatment. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit cb1f34988c8a4130485091b2f8b641303d8f701b Author: Lasse Collin Date: 2020-11-01 18:49:37 +0200 xz: Protect the ellipsis (...) on the man page with \&. This does it only when ... appears outside macro calls. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 5d224da3da87400f2fab313abbd7c710e7169ef9 Author: Lasse Collin Date: 2020-11-01 18:41:21 +0200 xz: Avoid the abbreviation "e.g." on the man page. A few are simply omitted, most are converted to "for example" and surrounded with commas. Sounds like that this is better style, for example, man-pages(7) recommends avoiding such abbreviations except in parenthesis. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 66 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) commit 90457dbe3e5717660f5b81f8c604860fc5137c0c Author: Lasse Collin Date: 2020-07-12 23:10:03 +0300 xz man page: Change \- (minus) to \(en (en-dash) for a numeric range. Docs of ancient troff/nroff mention \(em (em-dash) but not \(en and \- was used for both minus and en-dash. I don't know how portable \(en is nowadays but it can be changed back if someone complains. At least GNU groff and OpenBSD's mandoc support it. Thanks to Bjarni Ingi Gislason for the patch. src/xz/xz.1 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit 352ba2d69af2136bc814aa1df1a132559d445616 Author: Lasse Collin Date: 2020-07-12 20:46:24 +0300 Windows: Fix building of resource files when config.h isn't used. Now CMake + Visual Studio works for building liblzma.dll. Thanks to Markus Rickert. src/common/common_w32res.rc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit a9e2a87f1d61dcf684d809bf08c8ebea93f8a480 Author: Lasse Collin Date: 2020-04-06 19:31:50 +0300 src/scripts/xzgrep.1: Filenames to xzgrep are optional. xzgrep --help was correct already. src/scripts/xzgrep.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a7ba275d9b855d186abb29eb7a4f4cb6d9ca6fe0 Author: Bjarni Ingi Gislason Date: 2020-03-26 22:17:31 +0000 src/script/xzgrep.1: Remove superfluous '.RB' Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z [ "test-groff" is a developmental version of "groff" ] Input file is ./src/scripts/xzgrep.1 :20 (macro RB): only 1 argument, but more are expected :23 (macro RB): only 1 argument, but more are expected :26 (macro RB): only 1 argument, but more are expected :29 (macro RB): only 1 argument, but more are expected :32 (macro RB): only 1 argument, but more are expected "abc..." does not mean the same as "abc ...". The output from nroff and troff is unchanged except for the space between "file" and "...". Signed-off-by: Bjarni Ingi Gislason src/scripts/xzgrep.1 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 133d498db0f4b14f066d192d64dbcade45deae6b Author: Bjarni Ingi Gislason Date: 2020-03-30 21:56:36 +0000 xzgrep.1: Delete superfluous '.PP' Summary: mandoc -T lint xzgrep.1 : mandoc: xzgrep.1:79:2: WARNING: skipping paragraph macro: PP empty There is no change in the output of "nroff" and "troff". Signed-off-by: Bjarni Ingi Gislason src/scripts/xzgrep.1 | 1 - 1 file changed, 1 deletion(-) commit 057839ca982f886387b66746bffe749cb14fd8cd Author: Bjarni Ingi Gislason Date: 2020-03-26 21:16:18 +0000 src/xz/xz.1: Correct misused two-fonts macros Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z [ "test-groff" is a developmental version of "groff" ] Input file is ./src/xz/xz.1 :408 (macro BR): only 1 argument, but more are expected :1009 (macro BR): only 1 argument, but more are expected :1743 (macro BR): only 1 argument, but more are expected :1920 (macro BR): only 1 argument, but more are expected :2213 (macro BR): only 1 argument, but more are expected Output from nroff and troff is unchanged, except for a font change of a full stop (.). Signed-off-by: Bjarni Ingi Gislason src/xz/xz.1 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit b8e12f5ab4c9fd3cb09a4330b2861f6b979ababd Author: Lasse Collin Date: 2020-03-23 18:07:50 +0200 Typo fixes from fossies.org. https://fossies.org/linux/misc/xz-5.2.5.tar.xz/codespell.html Makefile.am | 2 +- doc/examples/01_compress_easy.c | 2 +- src/liblzma/api/lzma/base.h | 2 +- src/liblzma/check/crc32_x86.S | 2 +- src/liblzma/common/index.c | 2 +- src/xz/xz.1 | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) commit 869b9d1b4edd6df07f819d360d306251f8147353 Author: Lasse Collin Date: 2020-03-17 16:24:28 +0200 Update NEWS for 5.2.5. NEWS | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) commit a048e3a92d238c65f050a765174d9c75417231d4 Author: Lasse Collin Date: 2020-03-16 20:01:37 +0200 README: Update outdated sections. README | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) commit 29aed815ad4f98f3e4d355faa76a244ecd8ce716 Author: Lasse Collin Date: 2020-03-16 19:39:45 +0200 README: Mention that man pages can be translated. README | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 7fa7653940cc9dcfcbce2fbc5166ea343ad4e3c1 Author: Lasse Collin Date: 2020-03-16 16:43:29 +0200 Update INSTALL.generic from Automake 1.16.1. INSTALL.generic | 321 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 162 insertions(+), 159 deletions(-) commit 9bd317ef03ab9b3e6a927c27c2e9c4ac041182f0 Author: Lasse Collin Date: 2020-03-15 15:27:22 +0200 Update INSTALL for Windows and DOS and add preliminary info for z/OS. INSTALL | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) commit a3148c0446dc7fa96363752df414d22539c9007b Author: Lasse Collin Date: 2020-03-15 15:26:20 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive (again). m4/ax_pthread.m4 | 219 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 102 deletions(-) commit 7812002dd3ed319e42a14662a8531802cca8ca67 Author: Lasse Collin Date: 2020-03-11 21:15:35 +0200 xz: Never use thousand separators in DJGPP builds. DJGPP 2.05 added support for thousands separators but it's broken at least under WinXP with Finnish locale that uses a non-breaking space as the thousands separator. Workaround by disabling thousands separators for DJGPP builds. src/xz/util.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) commit 7c8f688bf7fccd65d396e0130cbf4ea5dff5c56f Author: Lasse Collin Date: 2020-03-11 19:38:08 +0200 DOS: Update dos/Makefile for DJGPP 2.05. It doesn't need -fgnu89-inline like 2.04beta did. dos/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 319ca928d73de87940c54e30bffe69f9fa65efdf Author: Lasse Collin Date: 2020-03-11 19:36:07 +0200 DOS: Update instructions in dos/INSTALL.txt. dos/INSTALL.txt | 59 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) commit cb6b227ce39932824812ccd8a0647bd968de27d2 Author: Lasse Collin Date: 2020-03-11 17:58:51 +0200 DOS: Update config.h. The added defines assume GCC >= 4.8. dos/config.h | 8 ++++++++ 1 file changed, 8 insertions(+) commit 4572d53e16e87eee375bc5624de2fd59bb0ae9cd Author: Lasse Collin Date: 2020-03-02 13:54:33 +0200 liblzma: Fix a comment and RC_SYMBOLS_MAX. The comment didn't match the value of RC_SYMBOLS_MAX and the value itself was slightly larger than actually needed. The only harm about this was that memory usage was a few bytes larger. src/liblzma/rangecoder/range_encoder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 265daa873c0d871f5f23f9b56e133a6f20045a0a Author: Lasse Collin Date: 2020-02-27 20:58:52 +0200 Build: Make CMake build fail if tuklib_cpucores or tuklib_physmem fails. CMakeLists.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 7c8b904527cdbe61248c80edcc2e20d840c4fef9 Author: Lasse Collin Date: 2020-02-27 20:24:27 +0200 Build: Add support for --no-po4a option to autogen.sh. Normally, if po4a isn't available, autogen.sh will return with non-zero exit status. The option --no-po4a can be useful when one knows that po4a isn't available but wants autogen.sh to still return with zero exit status. autogen.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) commit 292a5c0f9c9b3a66f5a5c652dc46381836d4537f Author: Lasse Collin Date: 2020-02-25 21:35:14 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 474320e9908786ba2021035f9013191e16cde08a Author: Lasse Collin Date: 2020-02-25 20:42:31 +0200 Build: Fix bugs in the CMake files. Seems that the phrase "add more quotes" from sh/bash scripting applies to CMake as well. E.g. passing an unquoted list ${FOO} to a function that expects one argument results in only the first element of the list being passed as an argument and the rest get ignored. Adding quotes helps ("${FOO}"). list(INSERT ...) is weird. Inserting an empty string to an empty variable results in empty list, but inserting it to a non-empty variable does insert an empty element to the list. Since INSERT requires at least one element, "${CMAKE_THREAD_LIBS_INIT}" needs to be quoted in CMakeLists.txt. It might result in an empty element in the list. It seems to not matter as empty elements consistently get ignored in that variable. In fact, calling cmake_check_push_state() and cmake_check_pop_state() will strip the empty elements from CMAKE_REQUIRED_LIBRARIES! In addition to quoting fixes, this fixes checks for the cache variables in tuklib_cpucores.cmake and tuklib_physmem.cmake. Thanks to Martin Matuška for testing and reporting the problems. These fixes aren't tested yet but hopefully they soon will be. CMakeLists.txt | 52 ++++++++++++++++++++++----------------------- cmake/tuklib_common.cmake | 8 ++++--- cmake/tuklib_cpucores.cmake | 30 ++++++++++++++------------ cmake/tuklib_integer.cmake | 34 +++++++++++++++-------------- cmake/tuklib_mbstr.cmake | 6 +++--- cmake/tuklib_physmem.cmake | 29 +++++++++++++------------ cmake/tuklib_progname.cmake | 4 ++-- 7 files changed, 85 insertions(+), 78 deletions(-) commit 7e3493d40eac0c3fa3d5124097745a70e15c41f6 Author: Lasse Collin Date: 2020-02-24 23:38:16 +0200 Build: Add very limited experimental CMake support. This does *NOT* replace the Autotools-based build system in the foreseeable future. See the comment in the beginning of CMakeLists.txt. So far this has been tested only on GNU/Linux but I commit it anyway to make it easier for others to test. Since I haven't played much with CMake before, it's likely that there are things that have been done in a silly or wrong way and need to be fixed. CMakeLists.txt | 643 ++++++++++++++++++++++++++++++++++++++++++++ cmake/tuklib_common.cmake | 47 ++++ cmake/tuklib_cpucores.cmake | 173 ++++++++++++ cmake/tuklib_integer.cmake | 100 +++++++ cmake/tuklib_mbstr.cmake | 20 ++ cmake/tuklib_physmem.cmake | 149 ++++++++++ cmake/tuklib_progname.cmake | 19 ++ 7 files changed, 1151 insertions(+) commit 21bd4701fca3e9002ce78bc135debca369ed8545 Author: Lasse Collin Date: 2020-02-24 23:37:07 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit e094d1d0f196a91ec703e8d0055948feef349ae8 Author: Lasse Collin Date: 2020-02-24 23:29:35 +0200 tuklib: Omit an unneeded from a tests. tuklib_cpucores.c and tuklib_physmem.c don't include even via other files in this package, so clearly that header isn't needed in the tests either (no one has reported build problems due to a missing header in a .c file). m4/tuklib_cpucores.m4 | 1 - m4/tuklib_physmem.m4 | 1 - 2 files changed, 2 deletions(-) commit b3ed19a55fe99a45bd77614e149d39d18498075c Author: Lasse Collin Date: 2020-02-24 23:01:00 +0200 liblzma: Remove unneeded from fastpos_tablegen.c. This file only generates fastpos_table.c. It isn't built as a part of liblzma. src/liblzma/lzma/fastpos_tablegen.c | 1 - 1 file changed, 1 deletion(-) commit 7b8982b29179b3c586e0456dc9ecbd4f58dcea59 Author: Lasse Collin Date: 2020-02-22 14:15:07 +0200 Use defined(__GNUC__) before __GNUC__ in preprocessor lines. This should silence the equivalent of -Wundef in compilers that don't define __GNUC__. src/common/sysdefs.h | 3 ++- src/liblzma/api/lzma.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) commit 43dfe04e6209c691cf4fbe3072d4ee91271748f1 Author: Lasse Collin Date: 2020-02-21 17:40:02 +0200 liblzma: Add more uses of lzma_memcmplen() to the normal mode of LZMA. This gives a tiny encoder speed improvement. This could have been done in 2014 after the commit 544aaa3d13554e8640f9caf7db717a96360ec0f6 but it was forgotten. src/liblzma/lzma/lzma_encoder_optimum_normal.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) commit 59e6eb4840b9f52fa3a61544974017279b448216 Author: Lasse Collin Date: 2020-02-21 17:01:15 +0200 Build: Add visibility.m4 from gnulib. Appears that this file used to get included as a side effect of gettext. After the change to gettext version requirements this file no longer got copied to the package and so the build was broken. m4/.gitignore | 1 - m4/visibility.m4 | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) commit 7fe3ef2eaa53d439cec043727ea1998f4ff0e22a Author: Lasse Collin Date: 2020-02-21 16:10:44 +0200 xz: Silence a warning when sig_atomic_t is long int. It can be true at least on z/OS. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b0a2a77d10940c42b449d47a005bfc2e50ab5db8 Author: Lasse Collin Date: 2020-02-21 15:59:26 +0200 xz: Avoid unneeded access of a volatile variable. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 524c2f12c762032b819757aeda8af7c47c4cabce Author: Lasse Collin Date: 2020-02-21 01:24:18 +0200 tuklib_integer.m4: Optimize the check order. The __builtin byteswapping is the preferred one so check for it first. m4/tuklib_integer.m4 | 56 +++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) commit 57360bb4fd79b358b36d2877db26ac828d1fdfcb Author: Lasse Collin Date: 2020-02-20 18:54:04 +0200 tuklib_exit: Add missing header. strerror() needs which happened to be included via tuklib_common.h -> tuklib_config.h -> sysdefs.h if HAVE_CONFIG_H was defined. This wasn't tested without config.h before so it had worked fine. src/common/tuklib_exit.c | 1 + 1 file changed, 1 insertion(+) commit fddd31175e74a538997a939d930462fde17d2dd4 Author: Lasse Collin Date: 2020-02-18 19:12:35 +0200 Revert the previous commit and add a comment. The previous commit broke crc32_tablegen.c. If the whole package is built without config.h (with defines set on the compiler command line) this should still work fine as long as these headers conform to C99 well enough. src/common/tuklib_config.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) commit 4e4e9fbb7e66d45319525ac224bff48fbdd0cf6e Author: Lasse Collin Date: 2020-02-17 23:37:20 +0200 Do not check for HAVE_CONFIG_H in tuklib_config.h. In XZ Utils sysdefs.h takes care of it and the required headers. src/common/tuklib_config.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) commit 2d4cef954feba82073951358466a1d614141cf33 Author: Lasse Collin Date: 2020-02-16 11:18:28 +0200 sysdefs.h: Omit the conditionals around string.h and limits.h. string.h is used unconditionally elsewhere in the project and configure has always stopped if limits.h is missing, so these headers must have been always available even on the weirdest systems. src/common/sysdefs.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit feb9c1969bc3eb33d4ecb72cfa897f92dae84939 Author: Lasse Collin Date: 2020-02-15 15:07:11 +0200 Build: Bump Autoconf and Libtool version requirements. There is no specific reason for this other than blocking the most ancient versions. These are still old: Autoconf 2.69 (2012) Automake 1.12 (2012) gettext 0.19.6 (2015) Libtool 2.4 (2010) configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 3d576cf92158d62790017ad7f2dd6dc1dd6b42bb Author: Lasse Collin Date: 2020-02-15 03:08:32 +0200 Build: Use AM_GNU_GETTEXT_REQUIRE_VERSION and require 0.19.6. This bumps the version requirement from 0.19 (from 2014) to 0.19.6 (2015). Using only the old AM_GNU_GETTEXT_VERSION results in old gettext infrastructure being placed in the package. By using both macros we get the latest gettext files while the other programs in the Autotools family can still see the old macro. configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit fa792b8befaf7cb3960b655e0a9410da866d756f Author: Lasse Collin Date: 2020-02-14 20:42:06 +0200 Translations: Add German translation of the man pages. Thanks to Mario Blättermann. po4a/de.po | 5532 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 5533 insertions(+), 1 deletion(-) commit 6f7211b6bb47a895b47f533282dba9ee9a1b0c8b Author: Lasse Collin Date: 2020-02-07 15:32:21 +0200 Build: Add support for translated man pages using po4a. The dependency on po4a is optional. It's never required to install the translated man pages when xz is built from a release tarball. If po4a is missing when building from xz.git, the translated man pages won't be generated but otherwise the build will work normally. The translations are only updated automatically by autogen.sh and by "make mydist". This makes it easy to keep po4a as an optional dependency and ensures that I won't forget to put updated translations to a release tarball. The translated man pages aren't installed if --disable-nls is used. The installation of translated man pages abuses Automake internals by calling "install-man" with redefined dist_man_MANS and man_MANS. This makes the hairy script code slightly less hairy. If it breaks some day, this code needs to be fixed; don't blame Automake developers. Also, this adds more quotes to the existing shell script code in the Makefile.am "-hook"s. Makefile.am | 4 ++++ autogen.sh | 8 ++++--- po4a/.gitignore | 2 ++ po4a/po4a.conf | 14 +++++++++++ po4a/update-po | 45 ++++++++++++++++++++++++++++++++++ src/scripts/Makefile.am | 64 +++++++++++++++++++++++++++++++++++++------------ src/xz/Makefile.am | 50 +++++++++++++++++++++++++++----------- src/xzdec/Makefile.am | 55 ++++++++++++++++++++++++++++++++---------- 8 files changed, 197 insertions(+), 45 deletions(-) commit 426f9e5819ff7710a5ff573a96c02940be65d52f Author: Lasse Collin Date: 2020-02-06 17:31:38 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e3a4481d020e4de89efa037f335cf50f3ca55592 Author: Lasse Collin Date: 2020-02-05 22:35:06 +0200 Update tests/.gitignore. .gitignore | 4 ++++ 1 file changed, 4 insertions(+) commit 07208de92f2d5bca764f6d0ebe9d9866051dc4ef Author: Lasse Collin Date: 2020-02-05 22:28:51 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit c91fbf223db46c3b3cb9df769863a1a60cd9c908 Author: Lasse Collin Date: 2020-02-05 20:47:38 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 15a133b6d1a3eab4faf6eb52a71fdc56bd65846f Author: Lasse Collin Date: 2020-02-05 20:40:14 +0200 xz: Make it a fatal error if enabling the sandbox fails. Perhaps it's too drastic but on the other hand it will let me learn about possible problems if people report the errors. This won't be backported to the v5.2 branch. src/xz/file_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit af0fb386ef55db66654ae39e2deec6e04190c4ff Author: Lasse Collin Date: 2020-02-05 20:33:50 +0200 xz: Comment out annoying sandboxing messages. src/xz/file_io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 986d8c9b52b824474088e5bb3b6940651660f0e2 Author: Lasse Collin Date: 2020-02-05 19:33:37 +0200 Build: Workaround a POSIX shell detection problem on Solaris. I don't know if the problem is in gnulib's gl_POSIX_SHELL macro or if xzgrep does something that isn't in POSIX. The workaround adds a special case for Solaris: if /usr/xpg4/bin/sh exists and gl_cv_posix_shell wasn't overriden on the configure command line, use that shell for xzgrep and other scripts. That shell is known to work and exists on most Solaris systems. configure.ac | 10 ++++++++++ 1 file changed, 10 insertions(+) commit 6629ed929cc7d45a11e385f357ab58ec15e7e4ad Author: Lasse Collin Date: 2020-02-03 22:03:50 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 398 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 279 insertions(+), 119 deletions(-) commit 353970510895f6a80adfe60cf71b70a95adfa8bc Author: Lasse Collin Date: 2020-02-01 19:56:18 +0200 xz: Limit --memlimit-compress to at most 4020 MiB for 32-bit xz. See the code comment for reasoning. It's far from perfect but hopefully good enough for certain cases while hopefully doing nothing bad in other situations. At presets -5 ... -9, 4020 MiB vs. 4096 MiB makes no difference on how xz scales down the number of threads. The limit has to be a few MiB below 4096 MiB because otherwise things like "xz --lzma2=dict=500MiB" won't scale down the dict size enough and xz cannot allocate enough memory. With "ulimit -v $((4096 * 1024))" on x86-64, the limit in xz had to be no more than 4085 MiB. Some safety margin is good though. This is hack but it should be useful when running 32-bit xz on a 64-bit kernel that gives full 4 GiB address space to xz. Hopefully this is enough to solve this: https://bugzilla.redhat.com/show_bug.cgi?id=1196786 FreeBSD has a patch that limits the result in tuklib_physmem() to SIZE_MAX on 32-bit systems. While I think it's not the way to do it, the results on --memlimit-compress have been good. This commit should achieve practically identical results for compression while leaving decompression and tuklib_physmem() and thus lzma_physmem() unaffected. src/xz/hardware.c | 32 +++++++++++++++++++++++++++++++- src/xz/xz.1 | 21 ++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) commit ba76d67585f88677af9f48b48e7bdc3bb7687def Author: Lasse Collin Date: 2020-01-26 20:53:25 +0200 xz: Set the --flush-timeout deadline when the first input byte arrives. xz --flush-timeout=2000, old version: 1. xz is started. The next flush will happen after two seconds. 2. No input for one second. 3. A burst of a few kilobytes of input. 4. No input for one second. 5. Two seconds have passed and flushing starts. The first second counted towards the flush-timeout even though there was no pending data. This can cause flushing to occur more often than needed. xz --flush-timeout=2000, after this commit: 1. xz is started. 2. No input for one second. 3. A burst of a few kilobytes of input. The next flush will happen after two seconds counted from the time when the first bytes of the burst were read. 4. No input for one second. 5. No input for another second. 6. Two seconds have passed and flushing starts. src/xz/coder.c | 6 +----- src/xz/file_io.c | 6 +++++- src/xz/mytime.c | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) commit fd47fd62bbb1bfd13ab63869137971d8b390025f Author: Lasse Collin Date: 2020-01-26 20:19:19 +0200 xz: Move flush_needed from mytime.h to file_pair struct in file_io.h. src/xz/coder.c | 3 ++- src/xz/file_io.c | 3 ++- src/xz/file_io.h | 3 +++ src/xz/mytime.c | 3 --- src/xz/mytime.h | 4 ---- 5 files changed, 7 insertions(+), 9 deletions(-) commit 815035681063d5774d3640fc20b8ede783dd574e Author: Lasse Collin Date: 2020-01-26 14:49:22 +0200 xz: coder.c: Make writing output a separate function. The same code sequence repeats so it's nicer as a separate function. Note that in one case there was no test for opt_mode != MODE_TEST, but that was only because that condition would always be true, so this commit doesn't change the behavior there. src/xz/coder.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) commit 5a49e081a098455bcdbd95cefb90e9b18780fe58 Author: Lasse Collin Date: 2020-01-26 14:13:42 +0200 xz: Fix semi-busy-waiting in xz --flush-timeout. When input blocked, xz --flush-timeout=1 would wake up every millisecond and initiate flushing which would have nothing to flush and thus would just waste CPU time. The fix disables the timeout when no input has been seen since the previous flush. src/xz/coder.c | 4 ++++ src/xz/file_io.c | 15 +++++++++++---- src/xz/file_io.h | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) commit dcca70fe9fa3c4bec56cf9c79e966166c9a9cf6a Author: Lasse Collin Date: 2020-01-26 13:47:31 +0200 xz: Refactor io_read() a bit. src/xz/file_io.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) commit 4ae9ab70cd3214395756435d13d8d000368ca2cb Author: Lasse Collin Date: 2020-01-26 13:37:08 +0200 xz: Update a comment in file_io.h. src/xz/file_io.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit 3333ba4a6795a55cf0375329ba08152bd7fcbd46 Author: Lasse Collin Date: 2020-01-26 13:27:51 +0200 xz: Move the setting of flush_needed in file_io.c to a nicer location. src/xz/file_io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit cf2df0f05ac98c1158c6e48145900b773223605d Author: Lasse Collin Date: 2020-01-19 21:54:33 +0200 Use $(LIB_FUZZING_ENGINE) in tests/ossfuzz/Makefile. https://github.com/google/oss-fuzz/pull/3219#issuecomment-573751048 Thanks to Bhargava Shastry for sending the patch. tests/ossfuzz/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7136f1735c60ac6967c4b8e277fcde53d485234f Author: Lasse Collin Date: 2019-12-31 00:41:28 +0200 Rename unaligned_read32ne to read32ne, and similarly for the others. src/common/tuklib_integer.h | 64 +++++++++++++++---------------- src/liblzma/common/alone_encoder.c | 2 +- src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/common/block_header_encoder.c | 2 +- src/liblzma/common/memcmplen.h | 9 ++--- src/liblzma/common/stream_flags_decoder.c | 6 +-- src/liblzma/common/stream_flags_encoder.c | 8 ++-- src/liblzma/lz/lz_encoder_hash.h | 2 +- src/liblzma/lzma/lzma_decoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder_private.h | 3 +- src/liblzma/simple/simple_decoder.c | 2 +- src/liblzma/simple/simple_encoder.c | 2 +- tests/test_block_header.c | 4 +- tests/test_stream_flags.c | 6 +-- 15 files changed, 54 insertions(+), 62 deletions(-) commit 5e78fcbf2eb21936022c9c5c3625d4da76f4b241 Author: Lasse Collin Date: 2019-12-31 00:29:48 +0200 Rename read32ne to aligned_read32ne, and similarly for the others. Using the aligned methods requires more care to ensure that the address really is aligned, so it's nicer if the aligned methods are prefixed. The next commit will remove the unaligned_ prefix from the unaligned methods which in liblzma are used in more places than the aligned ones. src/common/tuklib_integer.h | 56 +++++++++++++++++++++--------------------- src/liblzma/check/crc32_fast.c | 4 +-- src/liblzma/check/crc64_fast.c | 4 +-- 3 files changed, 32 insertions(+), 32 deletions(-) commit 77bc5bc6dd67056cfd5888520ac930cfc57b4516 Author: Lasse Collin Date: 2019-12-31 00:18:24 +0200 Revise tuklib_integer.h and .m4. Add a configure option --enable-unsafe-type-punning to get the old non-conforming memory access methods. It can be useful with old compilers or in some other less typical situations but shouldn't normally be used. Omit the packed struct trick for unaligned access. While it's best in some cases, this is simpler. If the memcpy trick doesn't work, one can request unsafe type punning from configure. Because CRC32/CRC64 code needs fast aligned reads, if no very safe way to do it is found, type punning is used as a fallback. This sucks but since it currently works in practice, it seems to be the least bad option. It's never needed with GCC >= 4.7 or Clang >= 3.6 since these support __builtin_assume_aligned and thus fast aligned access can be done with the memcpy trick. Other things: - Support GCC/Clang __builtin_bswapXX - Cleaner bswap fallback macros - Minor cleanups m4/tuklib_integer.m4 | 46 ++++- src/common/tuklib_integer.h | 488 ++++++++++++++++++++++++-------------------- 2 files changed, 316 insertions(+), 218 deletions(-) commit 8b72950a6b2e2a36c2d8fdc8857564b57191b088 Author: Lasse Collin Date: 2019-12-29 22:51:58 +0200 Tests: Hopefully fix test_check.c to work on EBCDIC systems. Thanks to Daniel Richard G. tests/test_check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) commit 43ce4ea7c762238d3df9717b34126d3e0d7cd51c Author: Lasse Collin Date: 2019-09-24 23:02:40 +0300 Scripts: Put /usr/xpg4/bin to the beginning of PATH on Solaris. This adds a configure option --enable-path-for-scripts=PREFIX which defaults to empty except on Solaris it is /usr/xpg4/bin to make POSIX grep and others available. The Solaris case had been documented in INSTALL with a manual fix but it's better to do this automatically since it is needed on most Solaris systems anyway. Thanks to Daniel Richard G. INSTALL | 43 +++++++++++++++++++++++++++++++++++-------- configure.ac | 26 ++++++++++++++++++++++++++ src/scripts/xzdiff.in | 1 + src/scripts/xzgrep.in | 1 + src/scripts/xzless.in | 1 + src/scripts/xzmore.in | 1 + 6 files changed, 65 insertions(+), 8 deletions(-) commit 6a89e656ebedd53a10cd1a063a32a9e4ade0da1f Author: Lasse Collin Date: 2019-07-12 18:57:43 +0300 Fix comment typos in tuklib_mbstr* files. src/common/tuklib_mbstr.h | 2 +- src/common/tuklib_mbstr_fw.c | 2 +- src/common/tuklib_mbstr_width.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit ac0b4212656a48ef0c187c0c941d40ac9489ae36 Author: Lasse Collin Date: 2019-07-12 18:30:46 +0300 Add missing include to tuklib_mbstr_width.c. It didn't matter in XZ Utils because sysdefs.h includes string.h anyway. src/common/tuklib_mbstr_width.c | 1 + 1 file changed, 1 insertion(+) commit 72a443281fb0b91aebf8cdff2ab1f7c07b081240 Author: Lasse Collin Date: 2019-07-12 18:10:57 +0300 Update tuklib base headers to include stdbool.h. src/common/tuklib_common.h | 2 +- src/common/tuklib_config.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) commit de1f47b2b40e960b7bc3acba754f66dd19705921 Author: Lasse Collin Date: 2019-06-28 00:54:31 +0300 xz: Automatically align the strings in --info-memory. This makes it easier to translate the strings. Also, the string for amount of RAM was shortened. src/xz/hardware.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) commit 8ce679125dbd0e2058d8f886e738d7f19a45cab5 Author: Lasse Collin Date: 2019-06-25 23:15:21 +0300 liblzma: Fix a buggy comment. src/liblzma/lz/lz_encoder_mf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit fc77929e92e869f6869bf88931066103fd75f376 Author: Lasse Collin Date: 2019-06-25 00:16:06 +0300 configure.ac: Fix a typo in a comment. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit e873902641794210ad7db59743f98e3e1cd6139f Author: Lasse Collin Date: 2019-06-25 00:08:13 +0300 Tests: Silence warnings from clang -Wassign-enum. Also changed 999 to 99 so it fits even if lzma_check happened to be 8 bits wide. tests/test_block_header.c | 3 ++- tests/test_stream_flags.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit d499e467d99efeaae688564eedc4548837c1416a Author: Lasse Collin Date: 2019-06-24 23:52:17 +0300 liblzma: Add a comment. src/liblzma/common/stream_encoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a12b13c5f0d54c684fa8446f93fdac08ab2a716b Author: Lasse Collin Date: 2019-06-24 23:45:21 +0300 liblzma: Silence clang -Wmissing-variable-declarations. src/liblzma/check/crc32_table.c | 3 +++ src/liblzma/check/crc64_table.c | 3 +++ 2 files changed, 6 insertions(+) commit 1b4675cebf7471f7cc9b7072c950e3de97147063 Author: Lasse Collin Date: 2019-06-24 23:25:41 +0300 Add LZMA_RET_INTERNAL1..8 to lzma_ret and use one for LZMA_TIMED_OUT. LZMA_TIMED_OUT is *internally* used as a value for lzma_ret enumeration. Previously it was #defined to 32 and cast to lzma_ret. That way it wasn't visible in the public API, but this was hackish. Now the public API has eight LZMA_RET_INTERNALx members and LZMA_TIMED_OUT is #defined to LZMA_RET_INTERNAL1. This way the code is cleaner overall although the public API has a few extra mysterious enum members. src/liblzma/api/lzma/base.h | 15 ++++++++++++++- src/liblzma/common/common.c | 4 +--- src/liblzma/common/common.h | 5 ++--- src/xz/message.c | 8 ++++++++ 4 files changed, 25 insertions(+), 7 deletions(-) commit 159c43875eb25deea626ed651274464bae3e32ef Author: Lasse Collin Date: 2019-06-24 22:57:43 +0300 xz: Silence a warning from clang -Wsign-conversion in main.c. src/xz/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 466cfcd3e52f6750ce28a635997f3dd84fb18515 Author: Lasse Collin Date: 2019-06-24 22:52:20 +0300 xz: Make "headings" static in list.c. Caught by clang -Wmissing-variable-declarations. src/xz/list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 608517b9b76c41fac6613dbda1193d6f41338e19 Author: Lasse Collin Date: 2019-06-24 22:47:39 +0300 liblzma: Remove incorrect uses of lzma_attribute((__unused__)). Caught by clang -Wused-but-marked-unused. src/liblzma/common/alone_decoder.c | 3 +-- src/liblzma/common/alone_encoder.c | 3 +-- src/liblzma/lz/lz_decoder.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) commit c2d2ab6a9d41a2b55d047c5b710aacf80d219255 Author: Lasse Collin Date: 2019-06-24 20:53:55 +0300 Tests: Silence a warning from -Wsign-conversion. tests/create_compress_files.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit 2402f7873dcae719d0ebddd23bb579074519ac52 Author: Lasse Collin Date: 2019-06-24 20:45:49 +0300 xz: Fix an integer overflow with 32-bit off_t. Or any off_t which isn't very big (like signed 64 bit integer that most system have). A small off_t could overflow if the file being decompressed had long enough run of zero bytes, which would result in corrupt output. src/xz/file_io.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) commit 4fd3a8dd0b60f029e1c66a0ee634f9e9fda3caa9 Author: Lasse Collin Date: 2019-06-24 01:24:17 +0300 xz: Cleanup io_seek_src() a bit. lseek() returns -1 on error and checking for -1 is nicer. src/xz/file_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit dfda7cf6afa486e10df035327d68753896dfb48a Author: Lasse Collin Date: 2019-06-24 00:57:23 +0300 Tests: Remove a duplicate branch from tests/tests.h. The duplication was introduced about eleven years ago and should have been cleaned up back then already. This was caught by -Wduplicated-branches. tests/tests.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) commit 1d4a904d8fb634bd5a04f7fbdd17d3739f3d8866 Author: Lasse Collin Date: 2019-06-24 00:40:45 +0300 xz: Change io_seek_src and io_pread arguments from off_t to uint64_t. This helps fixing warnings from -Wsign-conversion and makes the code look better too. src/xz/file_io.c | 16 ++++++++++++---- src/xz/file_io.h | 4 ++-- src/xz/list.c | 9 ++++----- 3 files changed, 18 insertions(+), 11 deletions(-) commit 50120deb0159fcb53ee1a6caffb2bb81a1ecd990 Author: Lasse Collin Date: 2019-06-24 00:12:38 +0300 xz: list.c: Fix some warnings from -Wsign-conversion. src/xz/list.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit d0a78751eb54fb1572002746c533936a118e4e42 Author: Lasse Collin Date: 2019-06-23 23:22:45 +0300 tuklib_mbstr_width: Fix a warning from -Wsign-conversion. src/common/tuklib_mbstr_width.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7883d73530b4b2a701ddd7d50c35676cbc158039 Author: Lasse Collin Date: 2019-06-23 23:19:34 +0300 xz: Fix some of the warnings from -Wsign-conversion. src/xz/args.c | 4 ++-- src/xz/coder.c | 4 ++-- src/xz/file_io.c | 5 +++-- src/xz/message.c | 4 ++-- src/xz/mytime.c | 4 ++-- src/xz/options.c | 2 +- src/xz/util.c | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) commit c2b994fe3d35e9e575c28869a2f7f534f2495d05 Author: Lasse Collin Date: 2019-06-23 22:27:45 +0300 tuklib_cpucores: Silence warnings from -Wsign-conversion. src/common/tuklib_cpucores.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit 07c4fa9e1a195e0543f271380c8de22a3ab145ff Author: Lasse Collin Date: 2019-06-23 21:40:47 +0300 xzdec: Fix warnings from -Wsign-conversion. src/xzdec/xzdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dfac2c9a1d7d4a2b8a5d7c9c6d567dee48318bcf Author: Lasse Collin Date: 2019-06-23 21:38:56 +0300 liblzma: Fix warnings from -Wsign-conversion. Also, more parentheses were added to the literal_subcoder macro in lzma_comon.h (better style but no functional change in the current usage). src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/delta/delta_decoder.c | 2 +- src/liblzma/lzma/fastpos.h | 2 +- src/liblzma/lzma/lzma2_decoder.c | 8 ++++---- src/liblzma/lzma/lzma_common.h | 3 ++- src/liblzma/lzma/lzma_decoder.c | 16 ++++++++-------- src/liblzma/simple/arm.c | 6 +++--- src/liblzma/simple/armthumb.c | 8 ++++---- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 9 +++++---- src/liblzma/simple/x86.c | 2 +- 11 files changed, 31 insertions(+), 29 deletions(-) commit 41838dcc26375f6aa393a63e4d81e2f4d223de07 Author: Lasse Collin Date: 2019-06-23 19:33:55 +0300 tuklib_integer: Silence warnings from -Wsign-conversion. src/common/tuklib_integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 3ce05d235f736d392347a05086b8033416874b87 Author: Lasse Collin Date: 2019-06-20 19:40:30 +0300 tuklib_integer: Fix usage of conv macros. Use a temporary variable instead of e.g. conv32le(unaligned_read32ne(buf)) because the macro can evaluate its argument multiple times. src/common/tuklib_integer.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit b525b0c0ef40cd89b69294c9b8d57f4a8db58e1f Author: Lasse Collin Date: 2019-06-03 20:44:19 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 039a168e8cf201d5104a25ec41f0cf25eda6cc53 Author: Lasse Collin Date: 2019-06-03 20:41:54 +0300 liblzma: Fix comments. Thanks to Bruce Stark. src/liblzma/common/alone_encoder.c | 4 ++-- src/liblzma/common/block_util.c | 2 +- src/liblzma/common/common.c | 2 +- src/liblzma/common/filter_common.h | 2 +- src/liblzma/common/filter_decoder.h | 2 +- src/liblzma/common/filter_flags_encoder.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) commit c460f6defebc5a81bbca90adc2476154ca244f69 Author: Lasse Collin Date: 2019-06-02 00:50:59 +0300 liblzma: Fix one more unaligned read to use unaligned_read16ne(). src/liblzma/lz/lz_encoder_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c81d77c537f0b8c8672868e1dc6cf7290ce4a25b Author: Lasse Collin Date: 2019-06-01 21:41:55 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 386394fc9fcde2615391f804eaa466749f96f4ef Author: Lasse Collin Date: 2019-06-01 21:36:13 +0300 liblzma: memcmplen: Use ctz32() from tuklib_integer.h. The same compiler-specific #ifdefs are already in tuklib_integer.h src/liblzma/common/memcmplen.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) commit 264ab971ce2994baac41b1579c9c35aba7743fc8 Author: Lasse Collin Date: 2019-06-01 21:30:03 +0300 tuklib_integer: Cleanup MSVC-specific code. src/common/tuklib_integer.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) commit 33773c6f2a8711d4aa6656795db52c59a28580ec Author: Lasse Collin Date: 2019-06-01 19:01:21 +0300 liblzma: Use unaligned_readXXne functions instead of type punning. Now gcc -fsanitize=undefined should be clean. Thanks to Jeffrey Walton. src/liblzma/common/memcmplen.h | 12 ++++++------ src/liblzma/lzma/lzma_encoder_private.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) commit e5f13a66567b1987e0aae42c6fdcd277bb5810ba Author: Lasse Collin Date: 2019-06-01 18:46:54 +0300 tuklib_integer: Autodetect support for unaligned access on ARM. The result is used as the default for --enable-unaligned-access. The test should work with GCC and Clang. m4/tuklib_integer.m4 | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 3bc112c2d38d5f348bce7bc2422286b1692c7490 Author: Lasse Collin Date: 2019-06-01 18:41:16 +0300 tuklib_integer: Improve unaligned memory access. Now memcpy() or GNU C packed structs for unaligned access instead of type punning. See the comment in this commit for details. Avoiding type punning with unaligned access is needed to silence gcc -fsanitize=undefined. New functions: unaliged_readXXne and unaligned_writeXXne where XX is 16, 32, or 64. src/common/tuklib_integer.h | 180 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 168 insertions(+), 12 deletions(-) commit 2a22de439ec63da1927b640eda309296a1e8dce5 Author: Lasse Collin Date: 2019-05-13 20:05:17 +0300 liblzma: Avoid memcpy(NULL, foo, 0) because it is undefined behavior. I should have always known this but I didn't. Here is an example as a reminder to myself: int mycopy(void *dest, void *src, size_t n) { memcpy(dest, src, n); return dest == NULL; } In the example, a compiler may assume that dest != NULL because passing NULL to memcpy() would be undefined behavior. Testing with GCC 8.2.1, mycopy(NULL, NULL, 0) returns 1 with -O0 and -O1. With -O2 the return value is 0 because the compiler infers that dest cannot be NULL because it was already used with memcpy() and thus the test for NULL gets optimized out. In liblzma, if a null-pointer was passed to memcpy(), there were no checks for NULL *after* the memcpy() call, so I cautiously suspect that it shouldn't have caused bad behavior in practice, but it's hard to be sure, and the problematic cases had to be fixed anyway. Thanks to Jeffrey Walton. src/liblzma/common/common.c | 6 +++++- src/liblzma/lz/lz_decoder.c | 12 +++++++++--- src/liblzma/simple/simple_coder.c | 10 +++++++++- 3 files changed, 23 insertions(+), 5 deletions(-) commit d3fc850cfedc058247d9e334ce59bbc8f2286d8a Author: Lasse Collin Date: 2019-05-11 20:56:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4adb8288ab61d5f14e212007b8742df0710baf73 Author: Lasse Collin Date: 2019-05-11 20:54:12 +0300 xz: Update xz man page date. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 2fb0ddaa557ce86e38fe06439930fa8665f092fd Author: Antoine Cœur Date: 2019-05-08 13:30:57 +0800 spelling Doxyfile.in | 2 +- NEWS | 2 +- doc/examples/11_file_info.c | 2 +- src/liblzma/api/lzma/block.h | 2 +- src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/lzma12.h | 2 +- src/liblzma/api/lzma/vli.h | 2 +- src/liblzma/common/file_info.c | 4 ++-- src/liblzma/common/hardware_physmem.c | 2 +- src/liblzma/common/index.c | 4 ++-- src/liblzma/common/stream_encoder_mt.c | 2 +- src/liblzma/common/vli_decoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 2 +- src/scripts/xzgrep.in | 2 +- src/xz/args.c | 2 +- src/xz/coder.c | 4 ++-- src/xz/list.c | 4 ++-- src/xz/main.c | 2 +- src/xz/mytime.h | 2 +- src/xz/private.h | 2 +- src/xz/xz.1 | 2 +- windows/build.bash | 2 +- 22 files changed, 26 insertions(+), 26 deletions(-) commit 4ed339606156bd313ed99237485cb8ed0362d64f Author: Lasse Collin Date: 2019-05-01 18:43:10 +0300 xz: In xz -lvv look at the widths of the check names too. Now the widths of the check names is used to adjust the width of the Check column. This way there no longer is a need to restrict the widths of the check names to be at most ten terminal-columns. src/xz/list.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) commit 2f4281a1001dcf7fdf1418c0c0d246c16561fb65 Author: Lasse Collin Date: 2019-05-01 18:33:25 +0300 xz: Fix xz -lvv column alignment to look at the translated strings. src/xz/list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 01d01b7c7c0b8eaf7f780a5584ec52c22d10fa4a Author: Lasse Collin Date: 2019-05-01 16:52:36 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 64030c6b17f7743df03a9948a0ccfcdf40c6b97c Author: Lasse Collin Date: 2019-05-01 16:43:16 +0300 Windows: Update VS version in windows/vs2019/config.h. windows/vs2019/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 2dc9117f5fbfab31444a3ca1e55795ccfa8a9f51 Author: Julien Marrec Date: 2019-04-25 17:44:06 +0200 Windows: Upgrade solution itself windows/vs2019/xz_win.sln | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit ac31413916fa9b11bab17f0f0aa63e2869360f6f Author: Julien Marrec Date: 2019-04-25 17:40:24 +0200 Windows: Upgrade solution with VS2019 windows/vs2019/liblzma.vcxproj | 15 ++++++++------- windows/vs2019/liblzma_dll.vcxproj | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) commit be25a0c37ba92a20c390b4d17fe502457fe96b71 Author: Julien Marrec Date: 2019-04-25 17:39:32 +0200 Windows: Duplicate windows/vs2017 before upgrading windows/vs2019/config.h | 148 ++++++++++++++ windows/vs2019/liblzma.vcxproj | 356 ++++++++++++++++++++++++++++++++++ windows/vs2019/liblzma_dll.vcxproj | 385 +++++++++++++++++++++++++++++++++++++ windows/vs2019/xz_win.sln | 48 +++++ 4 files changed, 937 insertions(+) commit d0e58b3a51e8e616f3dc26ec7b7e4aa0fa6991ad Author: Lasse Collin Date: 2019-03-04 22:49:04 +0200 README: Update translation instructions. XZ Utils is now part of the Translation Project . README | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) commit a750c35a7d45a16c11c1d40fecee8443c32a9996 Author: Lasse Collin Date: 2019-03-04 21:20:39 +0200 xz: Automatically align column headings in xz -lvv. src/xz/list.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 212 insertions(+), 51 deletions(-) commit 6cb42e8aa1dc37bf403a9f5acbd07e86036b7e77 Author: Lasse Collin Date: 2019-03-04 01:07:59 +0200 xz: Automatically align strings ending in a colon in --list output. This should avoid alignment errors in translations with these strings. src/xz/list.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 102 insertions(+), 12 deletions(-) commit 1e3f29b62f2c03e50fc9ebea7b83c1497dd35484 Author: Lasse Collin Date: 2019-01-13 17:29:23 +0200 Windows/VS2017: Omit WindowsTargetPlatformVersion from project files. I understood that if a WTPV is specified, it's often wrong because different VS installations have different SDK version installed. Omitting the WTPV tag makes VS2017 default to Windows SDK 8.1 which often is also missing, so in any case people may need to specify the WTPV before building. But some day in the future a missing WTPV tag will start to default to the latest installed SDK which sounds reasonable: https://developercommunity.visualstudio.com/content/problem/140294/windowstargetplatformversion-makes-it-impossible-t.html Thanks to "dom". windows/INSTALL-MSVC.txt | 4 ++++ windows/vs2017/liblzma.vcxproj | 1 - windows/vs2017/liblzma_dll.vcxproj | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) commit 4d86076332aece6314063d3712a5f364172bbb0f Author: Lasse Collin Date: 2018-12-20 20:42:29 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit b55d79461d1f6aeaac03c7dae84481e5eb8bea4c Author: Lasse Collin Date: 2018-12-14 20:34:30 +0200 xz: Fix a crash in progress indicator when in passthru mode. "xz -dcfv not_an_xz_file" crashed (all four options are required to trigger it). It caused xz to call lzma_get_progress(&strm, ...) when no coder was initialized in strm. In this situation strm.internal is NULL which leads to a crash in lzma_get_progress(). The bug was introduced when xz started using lzma_get_progress() to get progress info for multi-threaded compression, so the bug is present in versions 5.1.3alpha and higher. Thanks to Filip Palian for the bug report. src/xz/coder.c | 11 +++++++---- src/xz/message.c | 18 ++++++++++++++++-- src/xz/message.h | 3 ++- 3 files changed, 25 insertions(+), 7 deletions(-) commit 4ae5526de013efd1021686fa80bdd10cf1cb9c56 Author: Lasse Collin Date: 2018-11-22 17:20:31 +0200 xz: Update man page timestamp. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6a36d0d5f49e0080ff64dd9ef250abd489bea2ad Author: Pavel Raiskup Date: 2018-11-22 15:14:34 +0100 'have have' typos src/xz/signals.c | 2 +- src/xz/xz.1 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 9eca51ef805ed8002a851df1b4995d71826c8b6e Author: Lasse Collin Date: 2018-11-02 20:40:48 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 106d1a663d4ba42b63231caa289f531548df81c1 Author: Lasse Collin Date: 2018-11-02 20:18:45 +0200 Tests: Add a fuzz test program and a config file for OSS-Fuzz. Thanks to Bhargava Shastry and Github user pdknsk. tests/Makefile.am | 1 + tests/ossfuzz/Makefile | 7 ++++ tests/ossfuzz/config/fuzz.dict | 2 + tests/ossfuzz/config/fuzz.options | 2 + tests/ossfuzz/fuzz.c | 82 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+) commit a18ae42a79a19b1394b41eb3e238139fd28012ec Author: Lasse Collin Date: 2018-10-26 22:49:10 +0300 liblzma: Don't verify header CRC32s if building for fuzz testing. FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION is #defined when liblzma is being built for fuzz testing. Most fuzzed inputs would normally get rejected because of incorrect CRC32 and the actual header decoding code wouldn't get fuzzed. Disabling CRC32 checks avoids this problem. The fuzzer program must still use LZMA_IGNORE_CHECK flag to disable verification of integrity checks of uncompressed data. src/liblzma/common/block_header_decoder.c | 5 ++++- src/liblzma/common/index_decoder.c | 5 ++++- src/liblzma/common/index_hash.c | 5 ++++- src/liblzma/common/stream_flags_decoder.c | 10 ++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) commit f76f7516d6a1c832f61810c82e92d151cc80966c Author: Lasse Collin Date: 2018-07-27 18:10:44 +0300 xzless: Rename unused variables to silence static analysers. In this particular case I don't see this affecting readability of the code. Thanks to Pavel Raiskup. src/scripts/xzless.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 3cbcaeb07eb7543735befd6f507fdb5fa4363cff Author: Lasse Collin Date: 2018-07-27 16:02:58 +0300 liblzma: Remove an always-true condition from lzma_index_cat(). This should help static analysis tools to see that newg isn't leaked. Thanks to Pavel Raiskup. src/liblzma/common/index.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 76762ae6098ec55c326f4b4b4a42e8c1918ee81f Author: Lasse Collin Date: 2018-05-19 21:23:25 +0300 liblzma: Improve lzma_properties_decode() API documentation. src/liblzma/api/lzma/filter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 2267f5b0d20a5d24e93fcd9f72ea7eeb0d89708c Author: Lasse Collin Date: 2018-04-29 18:58:19 +0300 Bump the version number to 5.3.1alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit cee3021d30704858e4bdd22240e7d28e570d7451 Author: Lasse Collin Date: 2018-04-29 18:48:00 +0300 extra/scanlzma: Fix compiler warnings. extra/scanlzma/scanlzma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit c5c7ceb08a011b97d261798033e2c39613a69eb7 Author: Lasse Collin Date: 2018-04-29 18:44:47 +0300 DOS: Add file_info.c to the list of files to build. dos/Makefile | 1 + 1 file changed, 1 insertion(+) commit 114cab97af766b21e0fc8620479202fb1e7a5e41 Author: Lasse Collin Date: 2018-04-29 18:33:10 +0300 Update NEWS for 5.3.1alpha. NEWS | 11 +++++++++++ 1 file changed, 11 insertions(+) commit b8139e11c512bbf32bf58ab0689f9bb6c52819da Author: Lasse Collin Date: 2018-04-29 18:15:37 +0300 Add NEWS for 5.2.4. NEWS | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) commit 47b59d47cfd904a420fbd45629d168ca1973721d Author: Lasse Collin Date: 2018-02-06 19:36:30 +0200 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit bc197991690ede24ab143665b5b0f0f9cb35cc46 Author: Ben Boeckel Date: 2018-01-29 13:58:18 -0500 nothrow: use noexcept for C++11 and newer In C++11, the `throw()` specifier is deprecated and `noexcept` is preffered instead. src/liblzma/api/lzma.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit fb6d4f83cb6e144734f2a4216bb117bd56dc3cb5 Author: Lasse Collin Date: 2018-02-06 18:02:48 +0200 liblzma: Remove incorrect #ifdef from range_common.h. In most cases it was harmless but it could affect some custom build systems. Thanks to Pippijn van Steenhoven. src/liblzma/rangecoder/range_common.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit bc577d35c2d0ed17f554d2d8107b2a2a9abbac76 Author: Lasse Collin Date: 2018-01-10 22:10:39 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 713bbc1a80f26d34c96ed3dbb9887362204de3a1 Author: Lasse Collin Date: 2018-01-10 21:54:27 +0200 tuklib_integer: New Intel C compiler needs immintrin.h. Thanks to Melanie Blower (Intel) for the patch. src/common/tuklib_integer.h | 11 +++++++++++ 1 file changed, 11 insertions(+) commit a0ee1afbd99da138b559cb27fa2022e7f1ab44f3 Author: Lasse Collin Date: 2017-09-24 20:04:24 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit a1e2c568de29c0b57d873eab40a2879b749da429 Author: Lasse Collin Date: 2017-09-16 20:36:20 +0300 Windows: Fix paths in VS project files. Some paths use slashes instead of backslashes as directory separators... now it should work (I tested VS2013 version). windows/vs2013/liblzma.vcxproj | 12 ++++++------ windows/vs2013/liblzma_dll.vcxproj | 24 ++++++++++++------------ windows/vs2017/liblzma.vcxproj | 12 ++++++------ windows/vs2017/liblzma_dll.vcxproj | 24 ++++++++++++------------ 4 files changed, 36 insertions(+), 36 deletions(-) commit cea5cf8d26c9d1dc30a808614d79c0b25640e15e Author: Lasse Collin Date: 2017-09-16 12:56:20 +0300 Windows: Update VS2017 project files to include file info decoder. windows/vs2017/liblzma.vcxproj | 2 ++ windows/vs2017/liblzma_dll.vcxproj | 2 ++ 2 files changed, 4 insertions(+) commit 95d563db3ee497b223e522b699c4d4c29943eef0 Author: Lasse Collin Date: 2017-09-16 12:54:23 +0300 Windows: Add project files for VS2017. These files match the v5.2 branch (no file info decoder). windows/vs2017/config.h | 148 ++++++++++++++ windows/vs2017/liblzma.vcxproj | 355 ++++++++++++++++++++++++++++++++++ windows/vs2017/liblzma_dll.vcxproj | 384 +++++++++++++++++++++++++++++++++++++ windows/vs2017/xz_win.sln | 48 +++++ 4 files changed, 935 insertions(+) commit ab72416d62ea8f50ad31d5b8545fcb6a2bf96b73 Author: Lasse Collin Date: 2017-09-16 12:45:50 +0300 Windows: Update VS2013 project files to include file info decoder. windows/vs2013/liblzma.vcxproj | 2 ++ windows/vs2013/liblzma_dll.vcxproj | 2 ++ 2 files changed, 4 insertions(+) commit 82388980187b0e3794d187762054200bbdcc9a53 Author: Lasse Collin Date: 2017-09-16 12:39:43 +0300 Windows: Move VS2013 files into windows/vs2013 directory. windows/{ => vs2013}/config.h | 0 windows/{ => vs2013}/liblzma.vcxproj | 278 +++++++++++++++--------------- windows/{ => vs2013}/liblzma_dll.vcxproj | 280 +++++++++++++++---------------- windows/{ => vs2013}/xz_win.sln | 0 4 files changed, 279 insertions(+), 279 deletions(-) commit 94e3f986aa4e14b4ff01ac24857f499630d6d180 Author: Lasse Collin Date: 2017-08-14 20:08:33 +0300 Fix or hide warnings from GCC 7's -Wimplicit-fallthrough. src/liblzma/lzma/lzma_decoder.c | 6 ++++++ src/xz/list.c | 2 ++ 2 files changed, 8 insertions(+) commit 0b0e1e6803456aac641a59332200f8e95e2b7ea8 Author: Alexey Tourbin Date: 2017-05-16 23:56:35 +0300 Docs: Fix a typo in a comment in doc/examples/02_decompress.c. doc/examples/02_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a015cd1f90116e655be4eaf4aad42c4c911c2807 Author: Lasse Collin Date: 2017-05-23 18:34:43 +0300 xz: Fix "xz --list --robot missing_or_bad_file.xz". It ended up printing an uninitialized char-array when trying to print the check names (column 7) on the "totals" line. This also changes the column 12 (minimum xz version) to 50000002 (xz 5.0.0) instead of 0 when there are no valid input files. Thanks to kidmin for the bug report. src/xz/list.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit c2e29f06a7d1e3ba242ac2fafc69f5d6e92f62cd Author: Lasse Collin Date: 2017-04-24 20:20:11 +0300 Docs: Add doc/examples/11_file_info.c. doc/examples/11_file_info.c | 206 ++++++++++++++++++++++++++++++++++++++++++++ doc/examples/Makefile | 3 +- 2 files changed, 208 insertions(+), 1 deletion(-) commit 1520f6ec808896375ac7bf778c449e0f7dea5f46 Author: Lasse Collin Date: 2017-04-24 19:48:47 +0300 Build: Omit pre-5.0.0 entries from the generated ChangeLog. It makes ChangeLog significantly smaller. Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 8269782283806c90a8509c2ac2a308344f70e171 Author: Lasse Collin Date: 2017-04-24 19:48:23 +0300 xz: Use lzma_file_info_decoder() for --list. src/xz/list.c | 254 ++++++++++------------------------------------------------ 1 file changed, 44 insertions(+), 210 deletions(-) commit e353d0b1cc0d3997ae5048faa8e6786414953e06 Author: Lasse Collin Date: 2017-04-24 19:35:50 +0300 liblzma: Add lzma_file_info_decoder(). src/liblzma/api/lzma/index.h | 66 ++++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/file_info.c | 855 ++++++++++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 7 +- 4 files changed, 928 insertions(+), 1 deletion(-) commit 144ef9e19e9496c995b21505dd1e111c442968d1 Author: Lasse Collin Date: 2017-04-24 19:30:22 +0300 Update the Git repository URL to HTTPS in ChangeLog. ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 8c9842c265993d7dd4039f732d3546267fb5ecc4 Author: Lasse Collin Date: 2017-04-21 15:05:16 +0300 liblzma: Rename LZMA_SEEK to LZMA_SEEK_NEEDED and seek_in to seek_pos. src/liblzma/api/lzma/base.h | 18 +++++++++--------- src/liblzma/common/common.c | 2 +- src/xz/message.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) commit 662b27c417cab248cb365dd7682121bdec4d5ae7 Author: Lasse Collin Date: 2017-04-19 22:17:35 +0300 Update the home page URLs to HTTPS. COPYING | 2 +- README | 2 +- configure.ac | 2 +- doc/faq.txt | 4 ++-- dos/config.h | 2 +- src/common/common_w32res.rc | 2 +- src/xz/xz.1 | 6 +++--- src/xzdec/xzdec.1 | 4 ++-- windows/README-Windows.txt | 2 +- windows/config.h | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) commit c28f0b3d00af87b92dda229831548d8eb0067d1d Author: Lasse Collin Date: 2017-04-05 18:47:22 +0300 xz: Add io_seek_src(). src/xz/file_io.c | 20 +++++++++++++++++--- src/xz/file_io.h | 13 +++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) commit bba477257d7319c8764890f3669175b866d24944 Author: Lasse Collin Date: 2017-03-30 22:01:54 +0300 xz: Use POSIX_FADV_RANDOM for in "xz --list" mode. xz --list is random access so POSIX_FADV_SEQUENTIAL was clearly wrong. src/xz/file_io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) commit 310d19816d1652b0c8bb1b82574d46345d924752 Author: Lasse Collin Date: 2017-03-30 20:03:05 +0300 liblzma: Make lzma_index_decoder_init() visible to other liblzma funcs. This is to allow other functions to use it without going via the public API (lzma_index_decoder()). src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/index_decoder.c | 10 +++++----- src/liblzma/common/index_decoder.h | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) commit a27920002dbc469f778a134fc665b7c3ea73701b Author: Lasse Collin Date: 2017-03-30 20:00:09 +0300 liblzma: Add generic support for input seeking (LZMA_SEEK). Also mention LZMA_SEEK in xz/message.c to silence a warning. src/liblzma/api/lzma/base.h | 31 ++++++++++++++++++++++++++++++- src/liblzma/common/common.c | 12 +++++++++++- src/xz/message.c | 1 + 3 files changed, 42 insertions(+), 2 deletions(-) commit a0b1dda409bc3e6e2957a2651663fc411d2caf2d Author: Lasse Collin Date: 2017-03-30 19:47:45 +0300 liblzma: Fix lzma_memlimit_set(strm, 0). The 0 got treated specially in a buggy way and as a result the function did nothing. The API doc said that 0 was supposed to return LZMA_PROG_ERROR but it didn't. Now 0 is treated as if 1 had been specified. This is done because 0 is already used to indicate an error from lzma_memlimit_get() and lzma_memusage(). In addition, lzma_memlimit_set() no longer checks that the new limit is at least LZMA_MEMUSAGE_BASE. It's counter-productive for the Index decoder and was actually needed only by the auto decoder. Auto decoder has now been modified to check for LZMA_MEMUSAGE_BASE. src/liblzma/api/lzma/base.h | 7 ++++++- src/liblzma/common/auto_decoder.c | 3 +++ src/liblzma/common/common.c | 6 ++++-- 3 files changed, 13 insertions(+), 3 deletions(-) commit 84462afaada61379f5878e46f8f00e25a1cdcf29 Author: Lasse Collin Date: 2017-03-30 19:16:55 +0300 liblzma: Similar memlimit fix for stream_, alone_, and auto_decoder. src/liblzma/api/lzma/container.h | 21 +++++++++++++++++---- src/liblzma/common/alone_decoder.c | 5 +---- src/liblzma/common/auto_decoder.c | 5 +---- src/liblzma/common/stream_decoder.c | 5 +---- 4 files changed, 20 insertions(+), 16 deletions(-) commit cbc74017939690d13441b8926bb743fb03211b83 Author: Lasse Collin Date: 2017-03-30 18:58:18 +0300 liblzma: Fix handling of memlimit == 0 in lzma_index_decoder(). It returned LZMA_PROG_ERROR, which was done to avoid zero as the limit (because it's a special value elsewhere), but using LZMA_PROG_ERROR is simply inconvenient and can cause bugs. The fix/workaround is to treat 0 as if it were 1 byte. It's effectively the same thing. The only weird consequence is that then lzma_memlimit_get() will return 1 even when 0 was specified as the limit. This fixes a very rare corner case in xz --list where a specific memory usage limit and a multi-stream file could print the error message "Internal error (bug)" instead of saying that the memory usage limit is too low. src/liblzma/api/lzma/index.h | 18 +++++++++++------- src/liblzma/common/index_decoder.c | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) commit 78ae13bced912b1b92ae927992c99cbcc463cae7 Author: Lasse Collin Date: 2016-12-30 13:25:10 +0200 Update NEWS for 5.2.3. NEWS | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) commit 0297863fdb453aed1a25eb025f3ba7bacbbb1357 Author: Lasse Collin Date: 2016-12-26 20:55:52 +0200 Document --enable-sandbox configure option in INSTALL. INSTALL | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) commit d4a0462abe5478193521c14625e1c81fead87f9f Author: Lasse Collin Date: 2016-11-21 20:24:50 +0200 liblzma: Avoid multiple definitions of lzma_coder structures. Only one definition was visible in a translation unit. It avoided a few casts and temp variables but seems that this hack doesn't work with link-time optimizations in compilers as it's not C99/C11 compliant. Fixes: http://www.mail-archive.com/xz-devel@tukaani.org/msg00279.html src/liblzma/common/alone_decoder.c | 44 +++++---- src/liblzma/common/alone_encoder.c | 34 ++++--- src/liblzma/common/auto_decoder.c | 35 ++++--- src/liblzma/common/block_decoder.c | 41 ++++---- src/liblzma/common/block_encoder.c | 40 ++++---- src/liblzma/common/common.h | 18 ++-- src/liblzma/common/index_decoder.c | 33 ++++--- src/liblzma/common/index_encoder.c | 16 ++-- src/liblzma/common/stream_decoder.c | 50 +++++----- src/liblzma/common/stream_encoder.c | 56 ++++++----- src/liblzma/common/stream_encoder_mt.c | 124 ++++++++++++++----------- src/liblzma/delta/delta_common.c | 25 ++--- src/liblzma/delta/delta_decoder.c | 6 +- src/liblzma/delta/delta_encoder.c | 12 ++- src/liblzma/delta/delta_private.h | 4 +- src/liblzma/lz/lz_decoder.c | 60 ++++++------ src/liblzma/lz/lz_decoder.h | 13 ++- src/liblzma/lz/lz_encoder.c | 57 +++++++----- src/liblzma/lz/lz_encoder.h | 9 +- src/liblzma/lzma/lzma2_decoder.c | 32 ++++--- src/liblzma/lzma/lzma2_encoder.c | 51 +++++----- src/liblzma/lzma/lzma_decoder.c | 27 +++--- src/liblzma/lzma/lzma_encoder.c | 29 +++--- src/liblzma/lzma/lzma_encoder.h | 9 +- src/liblzma/lzma/lzma_encoder_optimum_fast.c | 3 +- src/liblzma/lzma/lzma_encoder_optimum_normal.c | 23 ++--- src/liblzma/lzma/lzma_encoder_private.h | 6 +- src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 61 ++++++------ src/liblzma/simple/simple_private.h | 12 +-- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 15 +-- 35 files changed, 532 insertions(+), 423 deletions(-) commit a01794c52add98263b49119842c3e7141d1b9ced Author: Lasse Collin Date: 2016-10-24 18:53:25 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit df8f446e3ad47e5148b8c8d8b6e519d3ce29cb9d Author: Lasse Collin Date: 2016-10-24 18:51:36 +0300 tuklib_cpucores: Add support for sched_getaffinity(). It's available in glibc (GNU/Linux, GNU/kFreeBSD). It's better than sysconf(_SC_NPROCESSORS_ONLN) because sched_getaffinity() gives the number of cores available to the process instead of the total number of cores online. As a side effect, this commit fixes a bug on GNU/kFreeBSD where configure would detect the FreeBSD-specific cpuset_getaffinity() but it wouldn't actually work because on GNU/kFreeBSD it requires using -lfreebsd-glue when linking. Now the glibc-specific function will be used instead. Thanks to Sebastian Andrzej Siewior for the original patch and testing. m4/tuklib_cpucores.m4 | 30 +++++++++++++++++++++++++++++- src/common/tuklib_cpucores.c | 9 +++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) commit 446e4318fa79788e09299d5953b5dd428953d14b Author: Lasse Collin Date: 2016-06-30 20:27:36 +0300 xz: Fix copying of timestamps on Windows. xz used to call utime() on Windows, but its result gets lost on close(). Using _futime() seems to work. Thanks to Martok for reporting the bug: http://www.mail-archive.com/xz-devel@tukaani.org/msg00261.html configure.ac | 2 +- src/xz/file_io.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) commit 1b0ac0c53c761263e91e34195cb21dfdcfeac0bd Author: Lasse Collin Date: 2016-06-16 22:46:02 +0300 xz: Silence warnings from -Wlogical-op. Thanks to Evan Nemerson. src/xz/file_io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit c83b7a03342c3325ff10400b22ee21edfcd1e026 Author: Lasse Collin Date: 2016-04-10 20:55:49 +0300 Build: Fix = to += for xz_SOURCES in src/xz/Makefile.am. Thanks to Christian Kujau. src/xz/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ade31a2bfb95c94d05fbfc0ecbba5d6377f2506e Author: Lasse Collin Date: 2016-04-10 20:54:17 +0300 Build: Bump GNU Gettext version requirement to 0.19. It silences a few warnings and most people probably have 0.19 even on stable distributions. Thanks to Christian Kujau. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ac398c3bafa6e4c80e20571373a96947db863b3d Author: Lasse Collin Date: 2016-03-13 20:21:49 +0200 liblzma: Disable external SHA-256 by default. This is the sane thing to do. The conflict with OpenSSL on some OSes and especially that the OS-provided versions can be significantly slower makes it clear that it was a mistake to have the external SHA-256 support enabled by default. Those who want it can now pass --enable-external-sha256 to configure. INSTALL was updated with notes about OSes where this can be a bad idea. The SHA-256 detection code in configure.ac had some bugs that could lead to a build failure in some situations. These were fixed, although it doesn't matter that much now that the external SHA-256 is disabled by default. MINIX >= 3.2.0 uses NetBSD's libc and thus has SHA256_Init in libc instead of libutil. Support for the libutil version was removed. INSTALL | 36 ++++++++++++++++++++++ configure.ac | 76 +++++++++++++++++++++++------------------------ src/liblzma/check/check.h | 16 ++++------ 3 files changed, 79 insertions(+), 49 deletions(-) commit 6fd5ecb589a9fdd7a576ea48c4850d496bab9ce5 Author: Lasse Collin Date: 2016-03-10 20:27:05 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 473ef0dc69a30e64d5fa0d34aca02f7309faa3e9 Author: Lasse Collin Date: 2016-03-10 20:26:49 +0200 Build: Avoid SHA256_Init on FreeBSD and MINIX 3. On FreeBSD 10 and older, SHA256_Init from libmd conflicts with libcrypto from OpenSSL. The OpenSSL version has different sizeof(SHA256_CTX) and it can cause weird problems if wrong SHA256_Init gets used. Looking at the source, MINIX 3 seems to have a similar issue but I'm not sure. To be safe, I disabled SHA256_Init on MINIX 3 too. NetBSD has SHA256_Init in libc and they had a similar problem, but they already fixed it in 2009. Thanks to Jim Wilcoxson for the bug report that helped in finding the problem. configure.ac | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) commit faf302137e54d605b44ecf0373cb51a6403a2de1 Author: Lasse Collin Date: 2015-11-08 20:16:10 +0200 tuklib_physmem: Hopefully silence a warning on Windows. src/common/tuklib_physmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit e52e9151cf8613022d1de4712ff39dbcb666e991 Author: Lasse Collin Date: 2015-11-04 23:17:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 14115f84a38161d55eaa2d070f08739bde37e966 Author: Lasse Collin Date: 2015-11-04 23:14:00 +0200 liblzma: Make Valgrind happier with optimized (gcc -O2) liblzma. When optimizing, GCC can reorder code so that an uninitialized value gets used in a comparison, which makes Valgrind unhappy. It doesn't happen when compiled with -O0, which I tend to use when running Valgrind. Thanks to Rich Prohaska. I remember this being mentioned long ago by someone else but nothing was done back then. src/liblzma/lz/lz_encoder.c | 4 ++++ 1 file changed, 4 insertions(+) commit f4c95ba94beb71a608eb6eadbf82b44f53a0260e Author: Lasse Collin Date: 2015-11-03 20:55:45 +0200 liblzma: Rename lzma_presets.c back to lzma_encoder_presets.c. It would be too annoying to update other build systems just because of this. src/liblzma/lzma/Makefile.inc | 2 +- src/liblzma/lzma/{lzma_presets.c => lzma_encoder_presets.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit c7bc20a6f3e71920871d48db31a79ab58b5a0a4b Author: Lasse Collin Date: 2015-11-03 20:47:07 +0200 Build: Disable xzdec, lzmadec, and lzmainfo when they cannot be built. They all need decoder support and if that isn't available, there's no point trying to build them. configure.ac | 3 +++ 1 file changed, 3 insertions(+) commit 5cbca1205deeb6fb7afe7a864fa68a57466d928a Author: Lasse Collin Date: 2015-11-03 20:35:19 +0200 Build: Simplify $enable_{encoders,decoders} usage a bit. configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit af13781886c8e7a0aabebb5141ea282dc364f5c6 Author: Lasse Collin Date: 2015-11-03 20:31:31 +0200 Windows/MSVC: Update config.h. windows/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit 9fa5949330f162c5a2f6653f83025327837e8f39 Author: Lasse Collin Date: 2015-11-03 20:29:58 +0200 DOS: Update config.h. dos/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit cb3111e3ed84152912b5138d690c8d9f00c6ef02 Author: Lasse Collin Date: 2015-11-03 20:29:33 +0200 xz: Make xz buildable even when encoders or decoders are disabled. The patch is quite long but it's mostly about adding new #ifdefs to omit code when encoders or decoders have been disabled. This adds two new #defines to config.h: HAVE_ENCODERS and HAVE_DECODERS. configure.ac | 4 ++++ src/xz/Makefile.am | 8 ++++++-- src/xz/args.c | 16 ++++++++++++++++ src/xz/coder.c | 33 +++++++++++++++++++++++++-------- src/xz/main.c | 9 +++++++-- src/xz/private.h | 5 ++++- 6 files changed, 62 insertions(+), 13 deletions(-) commit 4cc584985c0b7a13901da1b7a64ef9f7cc36e8ab Author: Lasse Collin Date: 2015-11-03 18:06:40 +0200 Build: Build LZMA1/2 presets also when only decoder is wanted. People shouldn't rely on the presets when decoding raw streams, but xz uses the presets as the starting point for raw decoder options anyway. lzma_encocder_presets.c was renamed to lzma_presets.c to make it clear it's not used solely by the encoder code. src/liblzma/lzma/Makefile.inc | 6 +++++- src/liblzma/lzma/{lzma_encoder_presets.c => lzma_presets.c} | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) commit 23ed1d41489f632155bbc9660f323d57e09da180 Author: Lasse Collin Date: 2015-11-03 17:54:48 +0200 Build: Fix configure to handle LZMA1 dependency with LZMA2. Now it gives an error if LZMA1 encoder/decoder is missing when LZMA2 encoder/decoder was requested. Even better would be LZMA2 implicitly enabling LZMA1 but it would need more code. configure.ac | 5 ----- 1 file changed, 5 deletions(-) commit b0bc3e03852af13419ea2960881824258d451117 Author: Lasse Collin Date: 2015-11-03 17:41:54 +0200 Build: Don't omit lzma_cputhreads() unless using --disable-threads. Previously it was omitted if encoders were disabled with --disable-encoders. It didn't make sense and it also broke the build. src/liblzma/common/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit c6bf438ab39e0fb4a47d3c81725c227919502726 Author: Lasse Collin Date: 2015-11-02 18:16:51 +0200 liblzma: Fix a build failure related to external SHA-256 support. If an appropriate header and structure were found by configure, but a library with a usable SHA-256 functions wasn't, the build failed. src/liblzma/check/check.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) commit e18adc56f2262aa9394d46681e9e4b9981ed5e97 Author: Lasse Collin Date: 2015-11-02 15:19:10 +0200 xz: Always close the file before trying to delete it. unlink() can return EBUSY in errno for open files on some operating systems and file systems. src/xz/file_io.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) commit 282e768a1484e88c8b7ec35655ee4959954ec87a Author: Lasse Collin Date: 2015-10-12 21:08:42 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 372e402713a1d4337ffce5f56d5c5c9ed99a66d0 Author: Lasse Collin Date: 2015-10-12 21:07:41 +0300 Tests: Add tests for the two bugs fixed in index.c. tests/test_index.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) commit 21515d79d778b8730a434f151b07202d52a04611 Author: Lasse Collin Date: 2015-10-12 20:45:15 +0300 liblzma: Fix lzma_index_dup() for empty Streams. Stream Flags and Stream Padding weren't copied from empty Streams. src/liblzma/common/index.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) commit 09f395b6b360c0b13e8559eece1d179b908ebd3a Author: Lasse Collin Date: 2015-10-12 20:31:44 +0300 liblzma: Add a note to index.c for those using static analyzers. src/liblzma/common/index.c | 3 +++ 1 file changed, 3 insertions(+) commit 3bf857edfef51374f6f3fffae3d817f57d3264a0 Author: Lasse Collin Date: 2015-10-12 20:29:09 +0300 liblzma: Fix a memory leak in error path of lzma_index_dup(). lzma_index_dup() calls index_dup_stream() which, in case of an error, calls index_stream_end() to free memory allocated by index_stream_init(). However, it illogically didn't actually free the memory. To make it logical, the tree handling code was modified a bit in addition to changing index_stream_end(). Thanks to Evan Nemerson for the bug report. src/liblzma/common/index.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) commit 7f05803979b4b79642d5be4218a79da7a0b12c47 Author: Lasse Collin Date: 2015-09-29 13:57:28 +0300 Update NEWS for 5.2.2. NEWS | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 397fcc0946315b55c3c6d80e37e82a2a78bc15c1 Author: Hauke Henningsen Date: 2015-08-17 04:59:54 +0200 Update German translation, mostly wrt orthography Provide an update of the German translation. * A lot of compound words were previously written with spaces, while German orthography is relatively clear in that the components should not be separated. * When referring to the actual process of (de)compression rather than the concept, replace “(De-)Kompression” with “(De-)Komprimierung”. Previously, both forms were used in this context and are now used in a manner consistent with “Komprimierung” being more likely to refer to a process. * Consistently translate “standard input”/“output” * Use “Zeichen” instead of false friend “Charakter” for “character” * Insert commas around relative clauses (as required in German) * Some other minor corrections * Capitalize “ß” as “ẞ” * Consistently start option descriptions in --help with capital letters Acked-By: Andre Noll * Update after msgmerge po/de.po | 383 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 196 insertions(+), 187 deletions(-) commit cbc9e39bae715accb44168930a71888480aad569 Author: Lasse Collin Date: 2015-08-11 13:23:04 +0300 Build: Minor Cygwin cleanup. Some tests used "cygwin*" and some used "cygwin". I changed them all to use "cygwin". Shouldn't affect anything in practice. configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit bcacd8ce7a031566858e5e03c1009064c3f1c89e Author: Lasse Collin Date: 2015-08-11 13:21:52 +0300 Build: Support building of MSYS2 binaries. configure.ac | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 0275a5398c01d57b724dec7fea52dec3bd6edc6c Author: Lasse Collin Date: 2015-08-09 21:06:26 +0300 Windows: Define DLL_EXPORT when building liblzma.dll with MSVC. src/liblzma/common/common.h uses it to set __declspec(dllexport) for the API symbols. Thanks to Adam Walling. windows/liblzma_dll.vcxproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit a74525cf9b945fb0b370e64cf406104beb31729b Author: Lasse Collin Date: 2015-08-09 21:02:20 +0300 Windows: Omit unneeded header files from MSVC project files. windows/liblzma.vcxproj | 5 ----- windows/liblzma_dll.vcxproj | 5 ----- 2 files changed, 10 deletions(-) commit fbbb295a91caf39faf8838c8c39526e4cb4dc121 Author: Lasse Collin Date: 2015-07-12 20:48:19 +0300 liblzma: A MSVC-specific hack isn't needed with MSVC 2013 and newer. src/liblzma/api/lzma.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit 713dbe5c230fe00865a54f5c32358ea30f9a1156 Author: Lasse Collin Date: 2015-06-19 20:38:55 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 3a5d755d055d51f99c523b4c2952727e1e69cfa1 Author: Lasse Collin Date: 2015-06-19 20:21:30 +0300 Windows: Update the docs. INSTALL | 29 ++++++++----- windows/INSTALL-MSVC.txt | 47 ++++++++++++++++++++++ windows/{INSTALL-Windows.txt => INSTALL-MinGW.txt} | 2 +- 3 files changed, 67 insertions(+), 11 deletions(-) commit b0798c6aa6184efcefd0bdcca20f96121a13feda Author: Lasse Collin Date: 2015-06-19 17:25:31 +0300 Windows: Add MSVC project files for building liblzma. Thanks to Adam Walling for creating these files. windows/liblzma.vcxproj | 359 ++++++++++++++++++++++++++++++++++++++++ windows/liblzma_dll.vcxproj | 388 ++++++++++++++++++++++++++++++++++++++++++++ windows/xz_win.sln | 48 ++++++ 3 files changed, 795 insertions(+) commit 9b02a4ffdac1b9f066658ec4c95c0834f4cd2fb7 Author: Andre Noll Date: 2015-05-28 15:50:00 +0200 Fix typo in German translation. As pointed out by Robert Pollak, there's a typo in the German translation of the compression preset option (-0 ... -9) help text. "The compressor" translates to "der Komprimierer", and the genitive form is "des Komprimierers". The old word makes no sense at all. po/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c7f4041f6b8f4729f88d3bc888b2a4080ae51f72 Author: Lasse Collin Date: 2015-05-13 20:57:55 +0300 Tests: Fix a memory leak in test_bcj_exact_size. Thanks to Cristian Rodríguez. tests/test_bcj_exact_size.c | 1 + 1 file changed, 1 insertion(+) commit 17b29d4f0ae0f780fbd69e15a398dc478d8492f8 Author: Lasse Collin Date: 2015-05-12 18:08:24 +0300 Fix NEWS about threading in 5.2.0. Thanks to Andy Hochhaus. NEWS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 49c26920d6e2d85e5c6123e34958aed2e77485ad Author: Lasse Collin Date: 2015-05-11 21:26:16 +0300 xz: Document that threaded decompression hasn't been implemented yet. src/xz/xz.1 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit 5b2458cb244ed237efe4de1ebcf06e1b3a1f4256 Author: Lasse Collin Date: 2015-04-20 20:20:29 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6bd0349c58451b13442e8f463e35de83548bf985 Author: Lasse Collin Date: 2015-04-20 19:59:18 +0300 Revert "xz: Use pipe2() if available." This reverts commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd. It is a problem when libc has pipe2() but the kernel is too old to have pipe2() and thus pipe2() fails. In xz it's pointless to have a fallback for non-functioning pipe2(); it's better to avoid pipe2() completely. Thanks to Michael Fox for the bug report. configure.ac | 4 ++-- src/xz/file_io.c | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) commit fc0df0f8db87dff45543708a711e17d29c37f632 Author: Lasse Collin Date: 2015-04-01 14:45:25 +0300 xz: Fix the Capsicum rights on user_abort_pipe. src/xz/file_io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 57393615b31b3788dd77280452d845bcc12d33af Author: Lasse Collin Date: 2015-03-31 22:20:11 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1238381143a9a7ce84839c2582ccd56ff750a440 Author: Lasse Collin Date: 2015-03-31 22:19:34 +0300 xz: Add support for sandboxing with Capsicum. The sandboxing is used conditionally as described in main.c. This isn't optimal but it was much easier to implement than a full sandboxing solution and it still covers the most common use cases where xz is writing to standard output. This should have practically no effect on performance even with small files as fork() isn't needed. C and locale libraries can open files as needed. This has been fine in the past, but it's a problem with things like Capsicum. io_sandbox_enter() tries to ensure that various locale-related files have been loaded before cap_enter() is called, but it's possible that there are other similar problems which haven't been seen yet. Currently Capsicum is available on FreeBSD 10 and later and there is a port to Linux too. Thanks to Loganaden Velvindron for help. configure.ac | 41 +++++++++++++++++++++++++++ src/xz/Makefile.am | 2 +- src/xz/file_io.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xz/file_io.h | 6 ++++ src/xz/main.c | 18 ++++++++++++ src/xz/private.h | 4 +++ 6 files changed, 151 insertions(+), 1 deletion(-) commit 29a087fb5a0c879f0b1bc4c6b989f7b87bacdf9e Author: Lasse Collin Date: 2015-03-31 21:12:30 +0300 Fix bugs and otherwise improve ax_check_capsicum.m4. AU_ALIAS was removed because the new version is incompatible with the old version. It no longer checks for separately. It's enough to test for it as part of AC_CHECK_DECL. The defines HAVE_CAPSICUM_SYS_CAPSICUM_H and HAVE_CAPSICUM_SYS_CAPABILITY_H were removed as unneeded. HAVE_SYS_CAPSICUM_H from AC_CHECK_HEADERS is enough. It no longer does a useless search for the Capsicum library if the header wasn't found. Fixed a bug in ACTION-IF-FOUND (the first argument). Specifying the argument omitted the default action but the given action wasn't used instead. AC_DEFINE([HAVE_CAPSICUM]) is now always called when Capsicum support is found. Previously it was part of the default ACTION-IF-FOUND which a custom action would override. Now the default action only prepends ${CAPSICUM_LIB} to LIBS. The documentation was updated. Since there as no serial number, "#serial 2" was added. m4/ax_check_capsicum.m4 | 103 ++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 52 deletions(-) commit 6e845c6a3eddf2fde9db5a29950421dff60a43ac Author: Lasse Collin Date: 2015-03-31 19:20:24 +0300 Add m4/ax_check_capsicum.m4 for detecting Capsicum support. The file was loaded from this web page: https://github.com/google/capsicum-test/blob/dev/autoconf/m4/ax_check_capsicum.m4 Thanks to Loganaden Velvindron for pointing it out for me. m4/ax_check_capsicum.m4 | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) commit 3717885f9ef2c06f1bcbad9f4c2ed2d5695f844e Author: Lasse Collin Date: 2015-03-30 22:44:02 +0300 Bump version to 5.3.0alpha and soname to 5.3.99. The idea of 99 is that it looks a bit weird in this context. For new features there's no API/ABI stability in devel versions. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) commit eccd8155e107c5ada03d13e7730675cdf1a44ddc Author: Lasse Collin Date: 2015-03-29 22:14:47 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 25263fd9e7a8a913395cb93d7c104cd48c2b4a00 Author: Lasse Collin Date: 2015-03-29 22:13:48 +0300 Fix the detection of installed RAM on QNX. The earlier version compiled but didn't actually work since sysconf(_SC_PHYS_PAGES) always fails (or so I was told). Thanks to Ole André Vadla Ravnås for the patch and testing. m4/tuklib_physmem.m4 | 6 +++--- src/common/tuklib_physmem.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) commit 4c544d2410903d38402221cb783ed85585b6a007 Author: Lasse Collin Date: 2015-03-27 22:39:07 +0200 Fix CPU core count detection on QNX. It tried to use sysctl() on QNX but - it broke the build because sysctl() needs -lsocket on QNX; - sysctl() doesn't work for detecting the core count on QNX even if it compiled. sysconf() works. An alternative would have been to use QNX-specific SYSPAGE_ENTRY(num_cpu) from . Thanks to Ole André Vadla Ravnås. m4/tuklib_cpucores.m4 | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) commit e0ea6737b03e83ccaff4514d00e31bb926f8f0f3 Author: Lasse Collin Date: 2015-03-07 22:05:57 +0200 xz: size_t/uint32_t cleanup in options.c. src/xz/options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 8bcca29a65335fd679c13814b70b35b68fa5daed Author: Lasse Collin Date: 2015-03-07 22:04:23 +0200 xz: Fix a comment and silence a warning in message.c. src/xz/message.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit f243f5f44c6b19a7c289a0ec73a03ee08364cb5b Author: Lasse Collin Date: 2015-03-07 22:01:00 +0200 liblzma: Silence more uint32_t vs. size_t warnings. src/liblzma/lz/lz_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 7f0a4c50f4a374c40acf4b86848f301ad1e82d34 Author: Lasse Collin Date: 2015-03-07 19:54:00 +0200 xz: Make arg_count an unsigned int to silence a warning. Actually the value of arg_count cannot exceed INT_MAX but it's nicer as an unsigned int. src/xz/args.h | 2 +- src/xz/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit f6ec46801588b1be29c07c9db98558b521304002 Author: Lasse Collin Date: 2015-03-07 19:33:17 +0200 liblzma: Fix a warning in index.c. src/liblzma/common/index.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit a24518971cc621315af142dd3bb7614fab04ad27 Author: Lasse Collin Date: 2015-02-26 20:46:14 +0200 Build: Fix a CR+LF problem when running autoreconf -fi on OS/2. build-aux/version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dec11497a71518423b5ff0e759100cf8aadf6c7b Author: Lasse Collin Date: 2015-02-26 16:53:44 +0200 Bump version and soname for 5.2.1. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 29e39c79975ab89ee5dd671e97064534a9f3a649 Author: Lasse Collin Date: 2015-02-26 13:01:09 +0200 Update NEWS for 5.2.1. NEWS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd Author: Lasse Collin Date: 2015-02-22 19:38:48 +0200 xz: Use pipe2() if available. configure.ac | 4 ++-- src/xz/file_io.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) commit 117d962685c72682c63edc9bb765367189800202 Author: Lasse Collin Date: 2015-02-21 23:40:26 +0200 liblzma: Fix a compression-ratio regression in LZMA1/2 in fast mode. The bug was added in the commit f48fce093b07aeda95c18850f5e086d9f2383380 and thus affected 5.1.4beta and 5.2.0. Luckily the bug cannot cause data corruption or other nasty things. src/liblzma/lzma/lzma_encoder_optimum_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ae984e31c167d3bc52972ec422dd1ebd5f5d5719 Author: Lasse Collin Date: 2015-02-21 23:00:19 +0200 xz: Fix the fcntl() usage when creating a pipe for the self-pipe trick. Now it reads the old flags instead of blindly setting O_NONBLOCK. The old code may have worked correctly, but this is better. src/xz/file_io.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 2205bb5853098aea36a56df6f5747037175f66b4 Author: Lasse Collin Date: 2015-02-10 15:29:34 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit d935b0cdf3db440269b9d952b2b281b18f8c7b08 Author: Lasse Collin Date: 2015-02-10 15:28:30 +0200 tuklib_cpucores: Use cpuset_getaffinity() on FreeBSD if available. In FreeBSD, cpuset_getaffinity() is the preferred way to get the number of available cores. Thanks to Rui Paulo for the patch. I edited it slightly, but hopefully I didn't break anything. m4/tuklib_cpucores.m4 | 23 ++++++++++++++++++++++- src/common/tuklib_cpucores.c | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) commit eb61bc58c20769cac4d05f363b9c0e8c9c71a560 Author: Lasse Collin Date: 2015-02-09 22:08:37 +0200 xzdiff: Make the mktemp usage compatible with FreeBSD's mktemp. Thanks to Rui Paulo for the fix. src/scripts/xzdiff.in | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit b9a5b6b7a29029680af733082b6a46e0fc01623a Author: Lasse Collin Date: 2015-02-03 21:45:53 +0200 Add a few casts to tuklib_integer.h to silence possible warnings. I heard that Visual Studio 2013 gave warnings without the casts. Thanks to Gabi Davar. src/common/tuklib_integer.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) commit c45757135f40e4a0de730ba5fff0100219493982 Author: Lasse Collin Date: 2015-01-26 21:24:39 +0200 liblzma: Set LZMA_MEMCMPLEN_EXTRA depending on the compare method. src/liblzma/common/memcmplen.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) commit 3c500174ed5485f550972a2a6109c361e875f069 Author: Lasse Collin Date: 2015-01-26 20:40:16 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit fec88d41e672d9e197c9442aecf02bd0dfa6d516 Author: Lasse Collin Date: 2015-01-26 20:39:28 +0200 liblzma: Silence harmless Valgrind errors. Thanks to Torsten Rupp for reporting this. I had forgotten to run Valgrind before the 5.2.0 release. src/liblzma/lz/lz_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) commit a9b45badfec0928d20a27c7176c005fa637f7d1e Author: Lasse Collin Date: 2015-01-09 21:50:19 +0200 xz: Fix comments. src/xz/file_io.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit 541aee6dd4aa97a809aba281475a21b641bb89e2 Author: Lasse Collin Date: 2015-01-09 21:35:06 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4170edc914655310d2363baccf5e615e09b04911 Author: Lasse Collin Date: 2015-01-09 21:34:06 +0200 xz: Don't fail if stdout doesn't support O_NONBLOCK. This is similar to the case with stdin. Thanks to Brad Smith for the bug report and testing on OpenBSD. src/xz/file_io.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) commit 04bbc0c2843c50c8ad1cba42b937118e38b0508d Author: Lasse Collin Date: 2015-01-07 19:18:20 +0200 xz: Fix a memory leak in DOS-specific code. src/xz/file_io.c | 2 ++ 1 file changed, 2 insertions(+) commit f0f1f6c7235ffa901cf76fe18e33749e200b3eea Author: Lasse Collin Date: 2015-01-07 19:08:06 +0200 xz: Don't fail if stdin doesn't support O_NONBLOCK. It's a problem at least on OpenBSD which doesn't support O_NONBLOCK on e.g. /dev/null. I'm not surprised if it's a problem on other OSes too since this behavior is allowed in POSIX-1.2008. The code relying on this behavior was committed in June 2013 and included in 5.1.3alpha released on 2013-10-26. Clearly the development releases only get limited testing. src/xz/file_io.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) commit d2d484647d9d9d679f03c75abb0404f67069271c Author: Lasse Collin Date: 2015-01-06 20:30:15 +0200 Tests: Don't hide unexpected error messages in test_files.sh. Hiding them makes no sense since normally there's no error when testing the "good" files. With "bad" files errors are expected and then it makes sense to keep the messages hidden. tests/test_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit aae6a6aeda51cf94a47e39ad624728f9bee75e30 Author: Lasse Collin Date: 2014-12-30 11:17:16 +0200 Update Solaris notes in INSTALL. Mention the possible "make check" failure on Solaris in the Solaris-specific section of INSTALL. It was already in section 4.5 but it is better mention it in the OS-specific section too. INSTALL | 4 ++++ 1 file changed, 4 insertions(+) commit 7815112153178800a3521b9f31960e7cdc26cfba Author: Lasse Collin Date: 2014-12-26 12:00:05 +0200 Build: POSIX shell isn't required if scripts are disabled. INSTALL | 3 ++- configure.ac | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit a0cd05ee71d330b79ead6eb9222e1b24e1559d3a Author: Lasse Collin Date: 2014-12-21 20:48:37 +0200 DOS: Update Makefile. dos/Makefile | 1 + 1 file changed, 1 insertion(+) commit b85ee0905ec4ab7656d22e63519fdd3bedb21f2e Author: Lasse Collin Date: 2014-12-21 19:50:38 +0200 Windows: Fix bin_i486 to bin_i686 in build.bash. windows/build.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit cbafa710918195dbba3db02c3fab4f0538235206 Author: Lasse Collin Date: 2014-12-21 18:58:44 +0200 Docs: Use lzma_cputhreads() in 04_compress_easy_mt.c. doc/examples/04_compress_easy_mt.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) commit 8dbb57238d372c7263cfeb3e7f7fd9a73173156a Author: Lasse Collin Date: 2014-12-21 18:56:44 +0200 Docs: Update docs/examples/00_README.txt. doc/examples/00_README.txt | 4 ++++ 1 file changed, 4 insertions(+) commit 6060f7dc76fd6c2a8a1f8e85d0e4d86bb78273e6 Author: Lasse Collin Date: 2014-12-21 18:11:17 +0200 Bump version and soname for 5.2.0. I know that soname != app version, but I skip AGE=1 in -version-info to make the soname match the liblzma version anyway. It doesn't hurt anything as long as it doesn't conflict with library versioning rules. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- src/liblzma/liblzma.map | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/THANKS b/THANKS index 94055dab7872..e7b62b33312f 100644 --- a/THANKS +++ b/THANKS @@ -1,160 +1,163 @@ Thanks ====== Some people have helped more, some less, but nevertheless everyone's help has been important. :-) In alphabetical order: - Mark Adler - H. Peter Anvin - Jeff Bastian - Nelson H. F. Beebe - Karl Beldan - Karl Berry - Anders F. Björklund - Emmanuel Blot - Melanie Blower - Alexander Bluhm - Martin Blumenstingl - Ben Boeckel - Jakub Bogusz - Adam Borowski - Maarten Bosmans - Trent W. Buck - Kevin R. Bulgrien - James Buren - David Burklund - Daniel Mealha Cabrita - Milo Casagrande - Marek Černocký - Tomer Chachamu - Vitaly Chikunov - Antoine Cœur - Gabi Davar + - İhsan Doğan - Chris Donawa - Andrew Dudman - Markus Duft - İsmail Dönmez - Paul Eggert - Robert Elz - Gilles Espinasse - Denis Excoffier - Michael Felt - Michael Fox - Mike Frysinger - Daniel Richard G. - Tomasz Gajc - Bjarni Ingi Gislason - John Paul Adrian Glaubitz - Bill Glessner - Michał Górny - Jason Gorski - Juan Manuel Guerrero - Diederik de Haas - Joachim Henke - Christian Hesse - Vincenzo Innocente - Peter Ivanov - Nicholas Jackson - Sam James + - Hajin Jang - Jouk Jansen - Jun I Jin - Kiyoshi Kanazawa - Per Øyvind Karlsen + - Iouri Kharon - Thomas Klausner - Richard Koch - Ville Koskinen - Jan Kratochvil - Christian Kujau - Stephan Kulow - Ilya Kurdyukov - Peter Lawler - James M Leddy - Vincent Lefevre - Hin-Tak Leung - Andraž 'ruskie' Levstik - Cary Lewis - Wim Lewis - Xin Li - Eric Lindblad - Lorenzo De Liso - H.J. Lu - Bela Lubkin - Gregory Margo - Julien Marrec - Ed Maste - Martin Matuška - Ivan A. Melnikov - Jim Meyering - Arkadiusz Miskiewicz - Nathan Moinvaziri - Étienne Mollier - Conley Moorhous - Rafał Mużyło - Adrien Nader - Evan Nemerson - Hongbo Ni - Jonathan Nieder - Andre Noll - Peter O'Gorman - Daniel Packard - Filip Palian - Peter Pallinger - Rui Paulo - Igor Pavlov - Diego Elio Pettenò - Elbert Pol - Mikko Pouru - Rich Prohaska - Trần Ngọc Quân - Pavel Raiskup - Ole André Vadla Ravnås - Eric S. Raymond - Robert Readman - Bernhard Reutner-Fischer - Markus Rickert - Cristian Rodríguez - Christian von Roques - Boud Roukema - Torsten Rupp - Stephen Sachs - Jukka Salmi - Alexandre Sauvé - Benno Schulenberg - Andreas Schwab - Bhargava Shastry - Dan Shechter - Stuart Shelton - Sebastian Andrzej Siewior - Ville Skyttä - Brad Smith - Bruce Stark - Pippijn van Steenhoven - Jonathan Stott - Dan Stromberg - Jia Tan - Vincent Torri - Paul Townsend - Mohammed Adnène Trojette - Alexey Tourbin - Loganaden Velvindron - Patrick J. Volkerding - Martin Väth - Adam Walling - Jeffrey Walton - Christian Weisgerber - Dan Weiss - Bert Wesarg - Fredrik Wikstrom - Jim Wilcoxson - Ralf Wildenhues - Charles Wilson - Lars Wirzenius - Pilorz Wojciech - Ryan Young - Andreas Zieringer Also thanks to all the people who have participated in the Tukaani project. I have probably forgot to add some names to the above list. Sorry about that and thanks for your help. diff --git a/src/common/mythread.h b/src/common/mythread.h index 413821836020..37b5216b7c14 100644 --- a/src/common/mythread.h +++ b/src/common/mythread.h @@ -1,522 +1,528 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file mythread.h /// \brief Some threading related helper macros and functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef MYTHREAD_H #define MYTHREAD_H #include "sysdefs.h" // If any type of threading is enabled, #define MYTHREAD_ENABLED. #if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \ || defined(MYTHREAD_VISTA) # define MYTHREAD_ENABLED 1 #endif #ifdef MYTHREAD_ENABLED //////////////////////////////////////// // Shared between all threading types // //////////////////////////////////////// // Locks a mutex for a duration of a block. // // Perform mythread_mutex_lock(&mutex) in the beginning of a block // and mythread_mutex_unlock(&mutex) at the end of the block. "break" // may be used to unlock the mutex and jump out of the block. // mythread_sync blocks may be nested. // // Example: // // mythread_sync(mutex) { // foo(); // if (some_error) // break; // Skips bar() // bar(); // } // // At least GCC optimizes the loops completely away so it doesn't slow // things down at all compared to plain mythread_mutex_lock(&mutex) // and mythread_mutex_unlock(&mutex) calls. // #define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__) #define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line) #define mythread_sync_helper2(mutex, line) \ for (unsigned int mythread_i_ ## line = 0; \ mythread_i_ ## line \ ? (mythread_mutex_unlock(&(mutex)), 0) \ : (mythread_mutex_lock(&(mutex)), 1); \ mythread_i_ ## line = 1) \ for (unsigned int mythread_j_ ## line = 0; \ !mythread_j_ ## line; \ mythread_j_ ## line = 1) #endif #if !defined(MYTHREAD_ENABLED) ////////////////// // No threading // ////////////////// // Calls the given function once. This isn't thread safe. #define mythread_once(func) \ do { \ static bool once_ = false; \ if (!once_) { \ func(); \ once_ = true; \ } \ } while (0) #if !(defined(_WIN32) && !defined(__CYGWIN__)) // Use sigprocmask() to set the signal mask in single-threaded programs. #include static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { int ret = sigprocmask(how, set, oset); assert(ret == 0); (void)ret; } #endif #elif defined(MYTHREAD_POSIX) //////////////////// // Using pthreads // //////////////////// -#include #include #include #include #include +// If clock_gettime() isn't available, use gettimeofday() from +// as a fallback. gettimeofday() is in SUSv2 and thus is supported on all +// relevant POSIX systems. +#ifndef HAVE_CLOCK_GETTIME +# include +#endif + #define MYTHREAD_RET_TYPE void * #define MYTHREAD_RET_VALUE NULL typedef pthread_t mythread; typedef pthread_mutex_t mythread_mutex; typedef struct { pthread_cond_t cond; #ifdef HAVE_CLOCK_GETTIME // Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with // the condition variable. clockid_t clk_id; #endif } mythread_cond; typedef struct timespec mythread_condtime; // Calls the given function once in a thread-safe way. #define mythread_once(func) \ do { \ static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ pthread_once(&once_, &func); \ } while (0) // Use pthread_sigmask() to set the signal mask in multi-threaded programs. // Do nothing on OpenVMS since it lacks pthread_sigmask(). static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { #ifdef __VMS (void)how; (void)set; (void)oset; #else int ret = pthread_sigmask(how, set, oset); assert(ret == 0); (void)ret; #endif } // Creates a new thread with all signals blocked. Returns zero on success // and non-zero on error. static inline int mythread_create(mythread *thread, void *(*func)(void *arg), void *arg) { sigset_t old; sigset_t all; sigfillset(&all); mythread_sigmask(SIG_SETMASK, &all, &old); const int ret = pthread_create(thread, NULL, func, arg); mythread_sigmask(SIG_SETMASK, &old, NULL); return ret; } // Joins a thread. Returns zero on success and non-zero on error. static inline int mythread_join(mythread thread) { return pthread_join(thread, NULL); } // Initiatlizes a mutex. Returns zero on success and non-zero on error. static inline int mythread_mutex_init(mythread_mutex *mutex) { return pthread_mutex_init(mutex, NULL); } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { int ret = pthread_mutex_destroy(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_lock(mythread_mutex *mutex) { int ret = pthread_mutex_lock(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { int ret = pthread_mutex_unlock(mutex); assert(ret == 0); (void)ret; } // Initializes a condition variable. // // Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the // timeout in pthread_cond_timedwait() work correctly also if system time // is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available // everywhere while the default CLOCK_REALTIME is, so the default is // used if CLOCK_MONOTONIC isn't available. // // If clock_gettime() isn't available at all, gettimeofday() will be used. static inline int mythread_cond_init(mythread_cond *mycond) { #ifdef HAVE_CLOCK_GETTIME - // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1. -# if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && HAVE_DECL_CLOCK_MONOTONIC +# if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && \ + defined(HAVE_CLOCK_MONOTONIC) struct timespec ts; pthread_condattr_t condattr; // POSIX doesn't seem to *require* that pthread_condattr_setclock() // will fail if given an unsupported clock ID. Test that // CLOCK_MONOTONIC really is supported using clock_gettime(). if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0 && pthread_condattr_init(&condattr) == 0) { int ret = pthread_condattr_setclock( &condattr, CLOCK_MONOTONIC); if (ret == 0) ret = pthread_cond_init(&mycond->cond, &condattr); pthread_condattr_destroy(&condattr); if (ret == 0) { mycond->clk_id = CLOCK_MONOTONIC; return 0; } } // If anything above fails, fall back to the default CLOCK_REALTIME. // POSIX requires that all implementations of clock_gettime() must // support at least CLOCK_REALTIME. # endif mycond->clk_id = CLOCK_REALTIME; #endif return pthread_cond_init(&mycond->cond, NULL); } static inline void mythread_cond_destroy(mythread_cond *cond) { int ret = pthread_cond_destroy(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_signal(mythread_cond *cond) { int ret = pthread_cond_signal(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { int ret = pthread_cond_wait(&cond->cond, mutex); assert(ret == 0); (void)ret; } // Waits on a condition or until a timeout expires. If the timeout expires, // non-zero is returned, otherwise zero is returned. static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime); assert(ret == 0 || ret == ETIMEDOUT); return ret; } // Sets condtime to the absolute time that is timeout_ms milliseconds // in the future. The type of the clock to use is taken from cond. static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout_ms) { condtime->tv_sec = timeout_ms / 1000; condtime->tv_nsec = (timeout_ms % 1000) * 1000000; #ifdef HAVE_CLOCK_GETTIME struct timespec now; int ret = clock_gettime(cond->clk_id, &now); assert(ret == 0); (void)ret; condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_nsec; #else (void)cond; struct timeval now; gettimeofday(&now, NULL); condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_usec * 1000L; #endif // tv_nsec must stay in the range [0, 999_999_999]. if (condtime->tv_nsec >= 1000000000L) { condtime->tv_nsec -= 1000000000L; ++condtime->tv_sec; } } #elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA) ///////////////////// // Windows threads // ///////////////////// #define WIN32_LEAN_AND_MEAN #ifdef MYTHREAD_VISTA # undef _WIN32_WINNT # define _WIN32_WINNT 0x0600 #endif #include #include #define MYTHREAD_RET_TYPE unsigned int __stdcall #define MYTHREAD_RET_VALUE 0 typedef HANDLE mythread; typedef CRITICAL_SECTION mythread_mutex; #ifdef MYTHREAD_WIN95 typedef HANDLE mythread_cond; #else typedef CONDITION_VARIABLE mythread_cond; #endif typedef struct { // Tick count (milliseconds) in the beginning of the timeout. // NOTE: This is 32 bits so it wraps around after 49.7 days. // Multi-day timeouts may not work as expected. DWORD start; // Length of the timeout in milliseconds. The timeout expires // when the current tick count minus "start" is equal or greater // than "timeout". DWORD timeout; } mythread_condtime; // mythread_once() is only available with Vista threads. #ifdef MYTHREAD_VISTA #define mythread_once(func) \ do { \ static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \ BOOL pending_; \ if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \ abort(); \ if (pending_) { \ func(); \ if (!InitOnceComplete(&once, 0, NULL)) \ abort(); \ } \ } while (0) #endif // mythread_sigmask() isn't available on Windows. Even a dummy version would // make no sense because the other POSIX signal functions are missing anyway. static inline int mythread_create(mythread *thread, unsigned int (__stdcall *func)(void *arg), void *arg) { uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL); if (ret == 0) return -1; *thread = (HANDLE)ret; return 0; } static inline int mythread_join(mythread thread) { int ret = 0; if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0) ret = -1; if (!CloseHandle(thread)) ret = -1; return ret; } static inline int mythread_mutex_init(mythread_mutex *mutex) { InitializeCriticalSection(mutex); return 0; } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { DeleteCriticalSection(mutex); } static inline void mythread_mutex_lock(mythread_mutex *mutex) { EnterCriticalSection(mutex); } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { LeaveCriticalSection(mutex); } static inline int mythread_cond_init(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 *cond = CreateEvent(NULL, FALSE, FALSE, NULL); return *cond == NULL ? -1 : 0; #else InitializeConditionVariable(cond); return 0; #endif } static inline void mythread_cond_destroy(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 CloseHandle(*cond); #else (void)cond; #endif } static inline void mythread_cond_signal(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 SetEvent(*cond); #else WakeConditionVariable(cond); #endif } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); WaitForSingleObject(*cond, INFINITE); EnterCriticalSection(mutex); #else BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE); assert(ret); (void)ret; #endif } static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); #endif DWORD elapsed = GetTickCount() - condtime->start; DWORD timeout = elapsed >= condtime->timeout ? 0 : condtime->timeout - elapsed; #ifdef MYTHREAD_WIN95 DWORD ret = WaitForSingleObject(*cond, timeout); assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT); EnterCriticalSection(mutex); return ret == WAIT_TIMEOUT; #else BOOL ret = SleepConditionVariableCS(cond, mutex, timeout); assert(ret || GetLastError() == ERROR_TIMEOUT); return !ret; #endif } static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout) { (void)cond; condtime->start = GetTickCount(); condtime->timeout = timeout; } #endif #endif diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h index df7ecf400289..97be4ee380d0 100644 --- a/src/common/sysdefs.h +++ b/src/common/sysdefs.h @@ -1,199 +1,189 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file sysdefs.h /// \brief Common includes, definitions, system-specific things etc. /// /// This file is used also by the lzma command line tool, that's why this /// file is separate from common.h. // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_SYSDEFS_H #define LZMA_SYSDEFS_H ////////////// // Includes // ////////////// #ifdef HAVE_CONFIG_H # include #endif // Get standard-compliant stdio functions under MinGW and MinGW-w64. #ifdef __MINGW32__ # define __USE_MINGW_ANSI_STDIO 1 #endif // size_t and NULL #include #ifdef HAVE_INTTYPES_H # include #endif // C99 says that inttypes.h always includes stdint.h, but some systems // don't do that, and require including stdint.h separately. #ifdef HAVE_STDINT_H # include #endif // Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The // limits are also used to figure out some macros missing from pre-C99 systems. #include // Be more compatible with systems that have non-conforming inttypes.h. // We assume that int is 32-bit and that long is either 32-bit or 64-bit. // Full Autoconf test could be more correct, but this should work well enough. // Note that this duplicates some code from lzma.h, but this is better since // we can work without inttypes.h thanks to Autoconf tests. #ifndef UINT32_C # if UINT_MAX != 4294967295U # error UINT32_C is not defined and unsigned int is not 32-bit. # endif # define UINT32_C(n) n ## U #endif #ifndef UINT32_MAX # define UINT32_MAX UINT32_C(4294967295) #endif #ifndef PRIu32 # define PRIu32 "u" #endif #ifndef PRIx32 # define PRIx32 "x" #endif #ifndef PRIX32 # define PRIX32 "X" #endif #if ULONG_MAX == 4294967295UL # ifndef UINT64_C # define UINT64_C(n) n ## ULL # endif # ifndef PRIu64 # define PRIu64 "llu" # endif # ifndef PRIx64 # define PRIx64 "llx" # endif # ifndef PRIX64 # define PRIX64 "llX" # endif #else # ifndef UINT64_C # define UINT64_C(n) n ## UL # endif # ifndef PRIu64 # define PRIu64 "lu" # endif # ifndef PRIx64 # define PRIx64 "lx" # endif # ifndef PRIX64 # define PRIX64 "lX" # endif #endif #ifndef UINT64_MAX # define UINT64_MAX UINT64_C(18446744073709551615) #endif // Incorrect(?) SIZE_MAX: // - Interix headers typedef size_t to unsigned long, // but a few lines later define SIZE_MAX to INT32_MAX. // - SCO OpenServer (x86) headers typedef size_t to unsigned int // but define SIZE_MAX to INT32_MAX. #if defined(__INTERIX) || defined(_SCO_DS) # undef SIZE_MAX #endif // The code currently assumes that size_t is either 32-bit or 64-bit. #ifndef SIZE_MAX # if SIZEOF_SIZE_T == 4 # define SIZE_MAX UINT32_MAX # elif SIZEOF_SIZE_T == 8 # define SIZE_MAX UINT64_MAX # else # error size_t is not 32-bit or 64-bit # endif #endif #if SIZE_MAX != UINT32_MAX && SIZE_MAX != UINT64_MAX # error size_t is not 32-bit or 64-bit #endif #include #include -// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written +// Pre-C99 systems lack stdbool.h. All the code in XZ Utils must be written // so that it works with fake bool type, for example: // // bool foo = (flags & 0x100) != 0; // bool bar = !!(flags & 0x100); // // This works with the real C99 bool but breaks with fake bool: // // bool baz = (flags & 0x100); // #ifdef HAVE_STDBOOL_H # include #else # if ! HAVE__BOOL typedef unsigned char _Bool; # endif # define bool _Bool # define false 0 # define true 1 # define __bool_true_false_are_defined 1 #endif -// string.h should be enough but let's include strings.h and memory.h too if -// they exists, since that shouldn't do any harm, but may improve portability. #include -#ifdef HAVE_STRINGS_H -# include -#endif - -#ifdef HAVE_MEMORY_H -# include -#endif - // As of MSVC 2013, inline and restrict are supported with // non-standard keywords. #if defined(_WIN32) && defined(_MSC_VER) # ifndef inline # define inline __inline # endif # ifndef restrict # define restrict __restrict # endif #endif //////////// // Macros // //////////// #undef memzero #define memzero(s, n) memset(s, 0, n) // NOTE: Avoid using MIN() and MAX(), because even conditionally defining // those macros can cause some portability trouble, since on some systems // the system headers insist defining their own versions. #define my_min(x, y) ((x) < (y) ? (x) : (y)) #define my_max(x, y) ((x) > (y) ? (x) : (y)) #ifndef ARRAY_SIZE # define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) #endif #if defined(__GNUC__) \ && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4) # define lzma_attr_alloc_size(x) __attribute__((__alloc_size__(x))) #else # define lzma_attr_alloc_size(x) #endif #endif diff --git a/src/common/tuklib_progname.c b/src/common/tuklib_progname.c index 7cb7e203dd9d..e2ef4e555f35 100644 --- a/src/common/tuklib_progname.c +++ b/src/common/tuklib_progname.c @@ -1,50 +1,50 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_progname.c /// \brief Program name to be displayed in messages // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "tuklib_progname.h" #include -#if !HAVE_DECL_PROGRAM_INVOCATION_NAME +#ifndef HAVE_PROGRAM_INVOCATION_NAME char *progname = NULL; #endif extern void tuklib_progname_init(char **argv) { #ifdef TUKLIB_DOSLIKE // On these systems, argv[0] always has the full path and .exe // suffix even if the user just types the plain program name. // We modify argv[0] to make it nicer to read. // Strip the leading path. char *p = argv[0] + strlen(argv[0]); while (argv[0] < p && p[-1] != '/' && p[-1] != '\\') --p; argv[0] = p; // Strip the .exe suffix. p = strrchr(p, '.'); if (p != NULL) *p = '\0'; // Make it lowercase. for (p = argv[0]; *p != '\0'; ++p) if (*p >= 'A' && *p <= 'Z') *p = *p - 'A' + 'a'; #endif progname = argv[0]; return; } diff --git a/src/common/tuklib_progname.h b/src/common/tuklib_progname.h index 791b12517e59..bb80f25e0381 100644 --- a/src/common/tuklib_progname.h +++ b/src/common/tuklib_progname.h @@ -1,32 +1,32 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_progname.h /// \brief Program name to be displayed in messages // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef TUKLIB_PROGNAME_H #define TUKLIB_PROGNAME_H #include "tuklib_common.h" #include TUKLIB_DECLS_BEGIN -#if HAVE_DECL_PROGRAM_INVOCATION_NAME +#ifdef HAVE_PROGRAM_INVOCATION_NAME # define progname program_invocation_name #else # define progname TUKLIB_SYMBOL(tuklib_progname) extern char *progname; #endif #define tuklib_progname_init TUKLIB_SYMBOL(tuklib_progname_init) extern void tuklib_progname_init(char **argv); TUKLIB_DECLS_END #endif diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h index 7d37130f240f..0dd90fc872a9 100644 --- a/src/liblzma/api/lzma/version.h +++ b/src/liblzma/api/lzma/version.h @@ -1,121 +1,121 @@ /** * \file lzma/version.h * \brief Version number */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /* * Version number split into components */ #define LZMA_VERSION_MAJOR 5 #define LZMA_VERSION_MINOR 4 -#define LZMA_VERSION_PATCH 0 +#define LZMA_VERSION_PATCH 1 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE #ifndef LZMA_VERSION_COMMIT # define LZMA_VERSION_COMMIT "" #endif /* * Map symbolic stability levels to integers. */ #define LZMA_VERSION_STABILITY_ALPHA 0 #define LZMA_VERSION_STABILITY_BETA 1 #define LZMA_VERSION_STABILITY_STABLE 2 /** * \brief Compile-time version number * * The version number is of format xyyyzzzs where * - x = major * - yyy = minor * - zzz = revision * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable * * The same xyyyzzz triplet is never reused with different stability levels. * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta * or 5.1.0 stable. * * \note The version number of liblzma has nothing to with * the version number of Igor Pavlov's LZMA SDK. */ #define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + LZMA_VERSION_MINOR * UINT32_C(10000) \ + LZMA_VERSION_PATCH * UINT32_C(10) \ + LZMA_VERSION_STABILITY) /* * Macros to construct the compile-time version string */ #if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA # define LZMA_VERSION_STABILITY_STRING "alpha" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA # define LZMA_VERSION_STABILITY_STRING "beta" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE # define LZMA_VERSION_STABILITY_STRING "" #else # error Incorrect LZMA_VERSION_STABILITY #endif #define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ #major "." #minor "." #patch stability commit #define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) /** * \brief Compile-time version as a string * * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable * versions don't have any "stable" suffix). In future, a snapshot built * from source code repository may include an additional suffix, for example * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form * in LZMA_VERSION macro. */ #define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ LZMA_VERSION_COMMIT) /* #ifndef is needed for use with windres (MinGW or Cygwin). */ #ifndef LZMA_H_INTERNAL_RC /** * \brief Run-time version number as an integer * * Return the value of LZMA_VERSION macro at the compile time of liblzma. * This allows the application to compare if it was built against the same, * older, or newer version of liblzma that is currently running. */ extern LZMA_API(uint32_t) lzma_version_number(void) lzma_nothrow lzma_attr_const; /** * \brief Run-time version as a string * * This function may be useful if you want to display which version of * liblzma your application is currently using. */ extern LZMA_API(const char *) lzma_version_string(void) lzma_nothrow lzma_attr_const; #endif diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c index e3cbf1b1e91d..db44633bfaf7 100644 --- a/src/liblzma/check/crc64_fast.c +++ b/src/liblzma/check/crc64_fast.c @@ -1,512 +1,530 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file crc64.c /// \brief CRC64 calculation /// /// There are two methods in this file. crc64_generic uses the /// the slice-by-four algorithm. This is the same idea that is /// used in crc32_fast.c, but for CRC64 we use only four tables /// instead of eight to avoid increasing CPU cache usage. /// /// crc64_clmul uses 32/64-bit x86 SSSE3, SSE4.1, and CLMUL instructions. /// It was derived from /// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf /// and the public domain code from https://github.com/rawrunprotected/crc /// (URLs were checked on 2022-11-07). /// /// FIXME: Builds for 32-bit x86 use crc64_x86.S by default instead /// of this file and thus CLMUL version isn't available on 32-bit x86 /// unless configured with --disable-assembler. Even then the lookup table /// isn't omitted in crc64_table.c since it doesn't know that assembly /// code has been disabled. // // Authors: Lasse Collin // Ilya Kurdyukov // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "check.h" #undef CRC_GENERIC #undef CRC_CLMUL #undef CRC_USE_GENERIC_FOR_SMALL_INPUTS // If CLMUL cannot be used then only the generic slice-by-four is built. #if !defined(HAVE_USABLE_CLMUL) # define CRC_GENERIC 1 // If CLMUL is allowed unconditionally in the compiler options then the // generic version can be omitted. Note that this doesn't work with MSVC // as I don't know how to detect the features here. // // NOTE: Keep this this in sync with crc64_table.c. #elif (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \ || (defined(__e2k__) && __iset__ >= 6) # define CRC_CLMUL 1 // Otherwise build both and detect at runtime which version to use. #else # define CRC_GENERIC 1 # define CRC_CLMUL 1 /* // The generic code is much faster with 1-8-byte inputs and has // similar performance up to 16 bytes at least in microbenchmarks // (it depends on input buffer alignment too). If both versions are // built, this #define will use the generic version for inputs up to // 16 bytes and CLMUL for bigger inputs. It saves a little in code // size since the special cases for 0-16-byte inputs will be omitted // from the CLMUL code. # define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1 */ # if defined(_MSC_VER) # include # elif defined(HAVE_CPUID_H) # include # endif #endif ///////////////////////////////// // Generic slice-by-four CRC64 // ///////////////////////////////// #ifdef CRC_GENERIC #include "crc_macros.h" #ifdef WORDS_BIGENDIAN # define A1(x) ((x) >> 56) #else # define A1 A #endif // See the comments in crc32_fast.c. They aren't duplicated here. static uint64_t crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) { crc = ~crc; #ifdef WORDS_BIGENDIAN crc = bswap64(crc); #endif if (size > 4) { while ((uintptr_t)(buf) & 3) { crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); --size; } const uint8_t *const limit = buf + (size & ~(size_t)(3)); size &= (size_t)(3); while (buf < limit) { #ifdef WORDS_BIGENDIAN const uint32_t tmp = (uint32_t)(crc >> 32) ^ aligned_read32ne(buf); #else const uint32_t tmp = (uint32_t)crc ^ aligned_read32ne(buf); #endif buf += 4; crc = lzma_crc64_table[3][A(tmp)] ^ lzma_crc64_table[2][B(tmp)] ^ S32(crc) ^ lzma_crc64_table[1][C(tmp)] ^ lzma_crc64_table[0][D(tmp)]; } } while (size-- != 0) crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); #ifdef WORDS_BIGENDIAN crc = bswap64(crc); #endif return ~crc; } #endif ///////////////////// // x86 CLMUL CRC64 // ///////////////////// #ifdef CRC_CLMUL #include /* // These functions were used to generate the constants // at the top of crc64_clmul(). static uint64_t calc_lo(uint64_t poly) { uint64_t a = poly; uint64_t b = 0; for (unsigned i = 0; i < 64; ++i) { b = (b >> 1) | (a << 63); a = (a >> 1) ^ (a & 1 ? poly : 0); } return b; } static uint64_t calc_hi(uint64_t poly, uint64_t a) { for (unsigned i = 0; i < 64; ++i) a = (a >> 1) ^ (a & 1 ? poly : 0); return a; } */ #define MASK_L(in, mask, r) \ r = _mm_shuffle_epi8(in, mask) #define MASK_H(in, mask, r) \ r = _mm_shuffle_epi8(in, _mm_xor_si128(mask, vsign)) #define MASK_LH(in, mask, low, high) \ MASK_L(in, mask, low); \ MASK_H(in, mask, high) +// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC +// code when optimizations are enabled (release build). According to the bug +// report, the ebx register is corrupted and the calculated result is wrong. +// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help. +// The following pragma works and performance is still good. x86-64 builds +// aren't affected by this problem. +// +// NOTE: Another pragma after the function restores the optimizations. +// If the #if condition here is updated, the other one must be updated too. +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("g", off) +#endif + // EDG-based compilers (Intel's classic compiler and compiler for E2K) can // define __GNUC__ but the attribute must not be used with them. // The new Clang-based ICX needs the attribute. // // NOTE: Build systems check for this too, keep them in sync with this. #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) __attribute__((__target__("ssse3,sse4.1,pclmul"))) #endif static uint64_t crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc) { // The prototypes of the intrinsics use signed types while most of // the values are treated as unsigned here. These warnings in this // function have been checked and found to be harmless so silence them. #if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wsign-conversion" # pragma GCC diagnostic ignored "-Wconversion" #endif #ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS // The code assumes that there is at least one byte of input. if (size == 0) return crc; #endif // const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1 const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1 const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1) const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2) const __m128i vfold0 = _mm_set_epi64x(p, mu); const __m128i vfold1 = _mm_set_epi64x(k2, k1); // Create a vector with 8-bit values 0 to 15. This is used to // construct control masks for _mm_blendv_epi8 and _mm_shuffle_epi8. const __m128i vramp = _mm_setr_epi32( 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c); // This is used to inverse the control mask of _mm_shuffle_epi8 // so that bytes that wouldn't be picked with the original mask // will be picked and vice versa. const __m128i vsign = _mm_set1_epi8(0x80); // Memory addresses A to D and the distances between them: // // A B C D // [skip_start][size][skip_end] // [ size2 ] // // A and D are 16-byte aligned. B and C are 1-byte aligned. // skip_start and skip_end are 0-15 bytes. size is at least 1 byte. // // A = aligned_buf will initially point to this address. // B = The address pointed by the caller-supplied buf. // C = buf + size == aligned_buf + size2 // D = buf + size + skip_end == aligned_buf + size2 + skip_end const size_t skip_start = (size_t)((uintptr_t)buf & 15); const size_t skip_end = (size_t)(-(uintptr_t)(buf + size) & 15); const __m128i *aligned_buf = (const __m128i *)( (uintptr_t)buf & ~(uintptr_t)15); // If size2 <= 16 then the whole input fits into a single 16-byte // vector. If size2 > 16 then at least two 16-byte vectors must // be processed. If size2 > 16 && size <= 16 then there is only // one 16-byte vector's worth of input but it is unaligned in memory. // // NOTE: There is no integer overflow here if the arguments are valid. // If this overflowed, buf + size would too. size_t size2 = skip_start + size; // Masks to be used with _mm_blendv_epi8 and _mm_shuffle_epi8: // The first skip_start or skip_end bytes in the vectors will have // the high bit (0x80) set. _mm_blendv_epi8 and _mm_shuffle_epi8 // will produce zeros for these positions. (Bitwise-xor of these // masks with vsign will produce the opposite behavior.) const __m128i mask_start = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_start)); const __m128i mask_end = _mm_sub_epi8(vramp, _mm_set1_epi8(skip_end)); // Get the first 1-16 bytes into data0. If loading less than 16 bytes, // the bytes are loaded to the high bits of the vector and the least // significant positions are filled with zeros. const __m128i data0 = _mm_blendv_epi8(_mm_load_si128(aligned_buf), _mm_setzero_si128(), mask_start); ++aligned_buf; #if defined(__i386__) || defined(_M_IX86) const __m128i initial_crc = _mm_set_epi64x(0, ~crc); #else // GCC and Clang would produce good code with _mm_set_epi64x // but MSVC needs _mm_cvtsi64_si128 on x86-64. const __m128i initial_crc = _mm_cvtsi64_si128(~crc); #endif __m128i v0, v1, v2, v3; #ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS if (size <= 16) { // Right-shift initial_crc by 1-16 bytes based on "size" // and store the result in v1 (high bytes) and v0 (low bytes). // // NOTE: The highest 8 bytes of initial_crc are zeros so // v1 will be filled with zeros if size >= 8. The highest 8 // bytes of v1 will always become zeros. // // [ v1 ][ v0 ] // [ initial_crc ] size == 1 // [ initial_crc ] size == 2 // [ initial_crc ] size == 15 // [ initial_crc ] size == 16 (all in v0) const __m128i mask_low = _mm_add_epi8( vramp, _mm_set1_epi8(size - 16)); MASK_LH(initial_crc, mask_low, v0, v1); if (size2 <= 16) { // There are 1-16 bytes of input and it is all // in data0. Copy the input bytes to v3. If there // are fewer than 16 bytes, the low bytes in v3 // will be filled with zeros. That is, the input // bytes are stored to the same position as // (part of) initial_crc is in v0. MASK_L(data0, mask_end, v3); } else { // There are 2-16 bytes of input but not all bytes // are in data0. const __m128i data1 = _mm_load_si128(aligned_buf); // Collect the 2-16 input bytes from data0 and data1 // to v2 and v3, and bitwise-xor them with the // low bits of initial_crc in v0. Note that the // the second xor is below this else-block as it // is shared with the other branch. MASK_H(data0, mask_end, v2); MASK_L(data1, mask_end, v3); v0 = _mm_xor_si128(v0, v2); } v0 = _mm_xor_si128(v0, v3); v1 = _mm_alignr_epi8(v1, v0, 8); } else #endif { const __m128i data1 = _mm_load_si128(aligned_buf); MASK_LH(initial_crc, mask_start, v0, v1); v0 = _mm_xor_si128(v0, data0); v1 = _mm_xor_si128(v1, data1); #define FOLD \ v1 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x00)); \ v0 = _mm_xor_si128(v1, _mm_clmulepi64_si128(v0, vfold1, 0x11)); while (size2 > 32) { ++aligned_buf; size2 -= 16; FOLD v1 = _mm_load_si128(aligned_buf); } if (size2 < 32) { MASK_H(v0, mask_end, v2); MASK_L(v0, mask_end, v0); MASK_L(v1, mask_end, v3); v1 = _mm_or_si128(v2, v3); } FOLD v1 = _mm_srli_si128(v0, 8); #undef FOLD } v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold1, 0x10), v1); v0 = _mm_clmulepi64_si128(v1, vfold0, 0x00); v2 = _mm_clmulepi64_si128(v0, vfold0, 0x10); v0 = _mm_xor_si128(_mm_xor_si128(v2, _mm_slli_si128(v0, 8)), v1); #if defined(__i386__) || defined(_M_IX86) return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(v0, 2)); #else return ~(uint64_t)_mm_extract_epi64(v0, 1); #endif #if TUKLIB_GNUC_REQ(4, 6) || defined(__clang__) # pragma GCC diagnostic pop #endif } +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("", on) +#endif #endif //////////////////////// // Detect CPU support // //////////////////////// #if defined(CRC_GENERIC) && defined(CRC_CLMUL) static inline bool is_clmul_supported(void) { int success = 1; uint32_t r[4]; // eax, ebx, ecx, edx #if defined(_MSC_VER) // This needs with MSVC. ICC has it as a built-in // on all platforms. __cpuid(r, 1); #elif defined(HAVE_CPUID_H) // Compared to just using __asm__ to run CPUID, this also checks // that CPUID is supported and saves and restores ebx as that is // needed with GCC < 5 with position-independent code (PIC). success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); #else // Just a fallback that shouldn't be needed. __asm__("cpuid\n\t" : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) : "a"(1), "c"(0)); #endif // Returns true if these are supported: // CLMUL (bit 1 in ecx) // SSSE3 (bit 9 in ecx) // SSE4.1 (bit 19 in ecx) const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); return success && (r[2] & ecx_mask) == ecx_mask; // Alternative methods that weren't used: // - ICC's _may_i_use_cpu_feature: the other methods should work too. // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") // // CPUID decding is needed with MSVC anyway and older GCC. This keeps // the feature checks in the build system simpler too. The nice thing // about __builtin_cpu_supports would be that it generates very short // code as is it only reads a variable set at startup but a few bytes // doesn't matter here. } #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR # define CRC64_FUNC_INIT # define CRC64_SET_FUNC_ATTR __attribute__((__constructor__)) #else # define CRC64_FUNC_INIT = &crc64_dispatch # define CRC64_SET_FUNC_ATTR static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc); #endif // Pointer to the the selected CRC64 method. static uint64_t (*crc64_func)(const uint8_t *buf, size_t size, uint64_t crc) CRC64_FUNC_INIT; CRC64_SET_FUNC_ATTR static void crc64_set_func(void) { crc64_func = is_clmul_supported() ? &crc64_clmul : &crc64_generic; return; } #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc) { // When __attribute__((__constructor__)) isn't supported, set the // function pointer without any locking. If multiple threads run // the detection code in parallel, they will all end up setting // the pointer to the same value. This avoids the use of // mythread_once() on every call to lzma_crc64() but this likely // isn't strictly standards compliant. Let's change it if it breaks. crc64_set_func(); return crc64_func(buf, size, crc); } #endif #endif extern LZMA_API(uint64_t) lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) { #if defined(CRC_GENERIC) && defined(CRC_CLMUL) // If CLMUL is available, it is the best for non-tiny inputs, // being over twice as fast as the generic slice-by-four version. // However, for size <= 16 it's different. In the extreme case // of size == 1 the generic version can be five times faster. // At size >= 8 the CLMUL starts to become reasonable. It // varies depending on the alignment of buf too. // // The above doesn't include the overhead of mythread_once(). // At least on x86-64 GNU/Linux, pthread_once() is very fast but // it still makes lzma_crc64(buf, 1, crc) 50-100 % slower. When // size reaches 12-16 bytes the overhead becomes negligible. // // So using the generic version for size <= 16 may give better // performance with tiny inputs but if such inputs happen rarely // it's not so obvious because then the lookup table of the // generic version may not be in the processor cache. #ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS if (size <= 16) return crc64_generic(buf, size, crc); #endif /* #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR // See crc64_dispatch(). This would be the alternative which uses // locking and doesn't use crc64_dispatch(). Note that on Windows // this method needs Vista threads. mythread_once(crc64_set_func); #endif */ return crc64_func(buf, size, crc); #elif defined(CRC_CLMUL) // If CLMUL is used unconditionally without runtime CPU detection // then omitting the generic version and its 8 KiB lookup table // makes the library smaller. // // FIXME: Lookup table isn't currently omitted on 32-bit x86, // see crc64_table.c. return crc64_clmul(buf, size, crc); #else return crc64_generic(buf, size, crc); #endif } diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c index 978b7a6bb5b8..46fe8af1c153 100644 --- a/src/liblzma/common/filter_encoder.c +++ b/src/liblzma/common/filter_encoder.c @@ -1,305 +1,308 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file filter_decoder.c /// \brief Filter ID mapping to filter-specific functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" #include "filter_common.h" #include "lzma_encoder.h" #include "lzma2_encoder.h" #include "simple_encoder.h" #include "delta_encoder.h" typedef struct { /// Filter ID lzma_vli id; /// Initializes the filter encoder and calls lzma_next_filter_init() /// for filters + 1. lzma_init_function init; /// Calculates memory usage of the encoder. If the options are /// invalid, UINT64_MAX is returned. uint64_t (*memusage)(const void *options); /// Calculates the recommended Uncompressed Size for .xz Blocks to /// which the input data can be split to make multithreaded /// encoding possible. If this is NULL, it is assumed that /// the encoder is fast enough with single thread. uint64_t (*block_size)(const void *options); /// Tells the size of the Filter Properties field. If options are - /// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed - /// is used. + /// invalid, LZMA_OPTIONS_ERROR is returned and size is set to + /// UINT32_MAX. lzma_ret (*props_size_get)(uint32_t *size, const void *options); + + /// Some filters will always have the same size Filter Properties + /// field. If props_size_get is NULL, this value is used. uint32_t props_size_fixed; /// Encodes Filter Properties. /// /// \return - LZMA_OK: Properties encoded successfully. /// - LZMA_OPTIONS_ERROR: Unsupported options /// - LZMA_PROG_ERROR: Invalid options or not enough /// output space lzma_ret (*props_encode)(const void *options, uint8_t *out); } lzma_filter_encoder; static const lzma_filter_encoder encoders[] = { #ifdef HAVE_ENCODER_LZMA1 { .id = LZMA_FILTER_LZMA1, .init = &lzma_lzma_encoder_init, .memusage = &lzma_lzma_encoder_memusage, .block_size = NULL, // Not needed for LZMA1 .props_size_get = NULL, .props_size_fixed = 5, .props_encode = &lzma_lzma_props_encode, }, { .id = LZMA_FILTER_LZMA1EXT, .init = &lzma_lzma_encoder_init, .memusage = &lzma_lzma_encoder_memusage, .block_size = NULL, // Not needed for LZMA1 .props_size_get = NULL, .props_size_fixed = 5, .props_encode = &lzma_lzma_props_encode, }, #endif #ifdef HAVE_ENCODER_LZMA2 { .id = LZMA_FILTER_LZMA2, .init = &lzma_lzma2_encoder_init, .memusage = &lzma_lzma2_encoder_memusage, .block_size = &lzma_lzma2_block_size, .props_size_get = NULL, .props_size_fixed = 1, .props_encode = &lzma_lzma2_props_encode, }, #endif #ifdef HAVE_ENCODER_X86 { .id = LZMA_FILTER_X86, .init = &lzma_simple_x86_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_POWERPC { .id = LZMA_FILTER_POWERPC, .init = &lzma_simple_powerpc_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_IA64 { .id = LZMA_FILTER_IA64, .init = &lzma_simple_ia64_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_ARM { .id = LZMA_FILTER_ARM, .init = &lzma_simple_arm_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_ARMTHUMB { .id = LZMA_FILTER_ARMTHUMB, .init = &lzma_simple_armthumb_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_ARM64 { .id = LZMA_FILTER_ARM64, .init = &lzma_simple_arm64_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_SPARC { .id = LZMA_FILTER_SPARC, .init = &lzma_simple_sparc_encoder_init, .memusage = NULL, .block_size = NULL, .props_size_get = &lzma_simple_props_size, .props_encode = &lzma_simple_props_encode, }, #endif #ifdef HAVE_ENCODER_DELTA { .id = LZMA_FILTER_DELTA, .init = &lzma_delta_encoder_init, .memusage = &lzma_delta_coder_memusage, .block_size = NULL, .props_size_get = NULL, .props_size_fixed = 1, .props_encode = &lzma_delta_props_encode, }, #endif }; static const lzma_filter_encoder * encoder_find(lzma_vli id) { for (size_t i = 0; i < ARRAY_SIZE(encoders); ++i) if (encoders[i].id == id) return encoders + i; return NULL; } extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) { return encoder_find(id) != NULL; } extern LZMA_API(lzma_ret) lzma_filters_update(lzma_stream *strm, const lzma_filter *filters) { if (strm->internal->next.update == NULL) return LZMA_PROG_ERROR; // Validate the filter chain. if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) return LZMA_OPTIONS_ERROR; // The actual filter chain in the encoder is reversed. Some things // still want the normal order chain, so we provide both. size_t count = 1; while (filters[count].id != LZMA_VLI_UNKNOWN) ++count; lzma_filter reversed_filters[LZMA_FILTERS_MAX + 1]; for (size_t i = 0; i < count; ++i) reversed_filters[count - i - 1] = filters[i]; reversed_filters[count].id = LZMA_VLI_UNKNOWN; return strm->internal->next.update(strm->internal->next.coder, strm->allocator, filters, reversed_filters); } extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options) { return lzma_raw_coder_init(next, allocator, options, (lzma_filter_find)(&encoder_find), true); } extern LZMA_API(lzma_ret) lzma_raw_encoder(lzma_stream *strm, const lzma_filter *options) { lzma_next_strm_init(lzma_raw_coder_init, strm, options, (lzma_filter_find)(&encoder_find), true); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) { return lzma_raw_coder_memusage( (lzma_filter_find)(&encoder_find), filters); } extern uint64_t lzma_mt_block_size(const lzma_filter *filters) { uint64_t max = 0; for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { const lzma_filter_encoder *const fe = encoder_find(filters[i].id); if (fe->block_size != NULL) { const uint64_t size = fe->block_size(filters[i].options); if (size == 0) return 0; if (size > max) max = size; } } return max; } extern LZMA_API(lzma_ret) lzma_properties_size(uint32_t *size, const lzma_filter *filter) { const lzma_filter_encoder *const fe = encoder_find(filter->id); if (fe == NULL) { // Unknown filter - if the Filter ID is a proper VLI, // return LZMA_OPTIONS_ERROR instead of LZMA_PROG_ERROR, // because it's possible that we just don't have support // compiled in for the requested filter. return filter->id <= LZMA_VLI_MAX ? LZMA_OPTIONS_ERROR : LZMA_PROG_ERROR; } if (fe->props_size_get == NULL) { // No props_size_get() function, use props_size_fixed. *size = fe->props_size_fixed; return LZMA_OK; } return fe->props_size_get(size, filter->options); } extern LZMA_API(lzma_ret) lzma_properties_encode(const lzma_filter *filter, uint8_t *props) { const lzma_filter_encoder *const fe = encoder_find(filter->id); if (fe == NULL) return LZMA_PROG_ERROR; if (fe->props_encode == NULL) return LZMA_OK; return fe->props_encode(filter->options, props); } diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 24ec3c10c2e9..97cc9f953fae 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,1262 +1,1263 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c /// \brief Handling of .xz Indexes and some other Stream information // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// +#include "common.h" #include "index.h" #include "stream_flags_common.h" /// \brief How many Records to allocate at once /// /// This should be big enough to avoid making lots of tiny allocations /// but small enough to avoid too much unused memory at once. #define INDEX_GROUP_SIZE 512 /// \brief How many Records can be allocated at once at maximum #define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) /// \brief Base structure for index_stream and index_group structures typedef struct index_tree_node_s index_tree_node; struct index_tree_node_s { /// Uncompressed start offset of this Stream (relative to the /// beginning of the file) or Block (relative to the beginning /// of the Stream) lzma_vli uncompressed_base; /// Compressed start offset of this Stream or Block lzma_vli compressed_base; index_tree_node *parent; index_tree_node *left; index_tree_node *right; }; /// \brief AVL tree to hold index_stream or index_group structures typedef struct { /// Root node index_tree_node *root; /// Leftmost node. Since the tree will be filled sequentially, /// this won't change after the first node has been added to /// the tree. index_tree_node *leftmost; /// The rightmost node in the tree. Since the tree is filled /// sequentially, this is always the node where to add the new data. index_tree_node *rightmost; /// Number of nodes in the tree uint32_t count; } index_tree; typedef struct { lzma_vli uncompressed_sum; lzma_vli unpadded_sum; } index_record; typedef struct { /// Every Record group is part of index_stream.groups tree. index_tree_node node; /// Number of Blocks in this Stream before this group. lzma_vli number_base; /// Number of Records that can be put in records[]. size_t allocated; /// Index of the last Record in use. size_t last; /// The sizes in this array are stored as cumulative sums relative /// to the beginning of the Stream. This makes it possible to /// use binary search in lzma_index_locate(). /// /// Note that the cumulative summing is done specially for /// unpadded_sum: The previous value is rounded up to the next /// multiple of four before adding the Unpadded Size of the new /// Block. The total encoded size of the Blocks in the Stream /// is records[last].unpadded_sum in the last Record group of /// the Stream. /// /// For example, if the Unpadded Sizes are 39, 57, and 81, the /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). /// The total encoded size of these Blocks is 184. /// /// This is a flexible array, because it makes easy to optimize /// memory usage in case someone concatenates many Streams that /// have only one or few Blocks. index_record records[]; } index_group; typedef struct { /// Every index_stream is a node in the tree of Streams. index_tree_node node; /// Number of this Stream (first one is 1) uint32_t number; /// Total number of Blocks before this Stream lzma_vli block_number_base; /// Record groups of this Stream are stored in a tree. /// It's a T-tree with AVL-tree balancing. There are /// INDEX_GROUP_SIZE Records per node by default. /// This keeps the number of memory allocations reasonable /// and finding a Record is fast. index_tree groups; /// Number of Records in this Stream lzma_vli record_count; /// Size of the List of Records field in this Stream. This is used /// together with record_count to calculate the size of the Index /// field and thus the total size of the Stream. lzma_vli index_list_size; /// Stream Flags of this Stream. This is meaningful only if /// the Stream Flags have been told us with lzma_index_stream_flags(). /// Initially stream_flags.version is set to UINT32_MAX to indicate /// that the Stream Flags are unknown. lzma_stream_flags stream_flags; /// Amount of Stream Padding after this Stream. This defaults to /// zero and can be set with lzma_index_stream_padding(). lzma_vli stream_padding; } index_stream; struct lzma_index_s { /// AVL-tree containing the Stream(s). Often there is just one /// Stream, but using a tree keeps lookups fast even when there /// are many concatenated Streams. index_tree streams; /// Uncompressed size of all the Blocks in the Stream(s) lzma_vli uncompressed_size; /// Total size of all the Blocks in the Stream(s) lzma_vli total_size; /// Total number of Records in all Streams in this lzma_index lzma_vli record_count; /// Size of the List of Records field if all the Streams in this /// lzma_index were packed into a single Stream (makes it simpler to /// take many .xz files and combine them into a single Stream). /// /// This value together with record_count is needed to calculate /// Backward Size that is stored into Stream Footer. lzma_vli index_list_size; /// How many Records to allocate at once in lzma_index_append(). /// This defaults to INDEX_GROUP_SIZE but can be overridden with /// lzma_index_prealloc(). size_t prealloc; /// Bitmask indicating what integrity check types have been used /// as set by lzma_index_stream_flags(). The bit of the last Stream /// is not included here, since it is possible to change it by /// calling lzma_index_stream_flags() again. uint32_t checks; }; static void index_tree_init(index_tree *tree) { tree->root = NULL; tree->leftmost = NULL; tree->rightmost = NULL; tree->count = 0; return; } /// Helper for index_tree_end() static void index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator, void (*free_func)(void *node, const lzma_allocator *allocator)) { // The tree won't ever be very huge, so recursion should be fine. // 20 levels in the tree is likely quite a lot already in practice. if (node->left != NULL) index_tree_node_end(node->left, allocator, free_func); if (node->right != NULL) index_tree_node_end(node->right, allocator, free_func); free_func(node, allocator); return; } /// Free the memory allocated for a tree. Each node is freed using the /// given free_func which is either &lzma_free or &index_stream_end. /// The latter is used to free the Record groups from each index_stream /// before freeing the index_stream itself. static void index_tree_end(index_tree *tree, const lzma_allocator *allocator, void (*free_func)(void *node, const lzma_allocator *allocator)) { assert(free_func != NULL); if (tree->root != NULL) index_tree_node_end(tree->root, allocator, free_func); return; } /// Add a new node to the tree. node->uncompressed_base and /// node->compressed_base must have been set by the caller already. static void index_tree_append(index_tree *tree, index_tree_node *node) { node->parent = tree->rightmost; node->left = NULL; node->right = NULL; ++tree->count; // Handle the special case of adding the first node. if (tree->root == NULL) { tree->root = node; tree->leftmost = node; tree->rightmost = node; return; } // The tree is always filled sequentially. assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); assert(tree->rightmost->compressed_base < node->compressed_base); // Add the new node after the rightmost node. It's the correct // place due to the reason above. tree->rightmost->right = node; tree->rightmost = node; // Balance the AVL-tree if needed. We don't need to keep the balance // factors in nodes, because we always fill the tree sequentially, // and thus know the state of the tree just by looking at the node // count. From the node count we can calculate how many steps to go // up in the tree to find the rotation root. uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); if (up != 0) { // Locate the root node for the rotation. up = ctz32(tree->count) + 2; do { node = node->parent; } while (--up > 0); // Rotate left using node as the rotation root. index_tree_node *pivot = node->right; if (node->parent == NULL) { tree->root = pivot; } else { assert(node->parent->right == node); node->parent->right = pivot; } pivot->parent = node->parent; node->right = pivot->left; if (node->right != NULL) node->right->parent = node; pivot->left = node; node->parent = pivot; } return; } /// Get the next node in the tree. Return NULL if there are no more nodes. static void * index_tree_next(const index_tree_node *node) { if (node->right != NULL) { node = node->right; while (node->left != NULL) node = node->left; return (void *)(node); } while (node->parent != NULL && node->parent->right == node) node = node->parent; return (void *)(node->parent); } /// Locate a node that contains the given uncompressed offset. It is /// caller's job to check that target is not bigger than the uncompressed /// size of the tree (the last node would be returned in that case still). static void * index_tree_locate(const index_tree *tree, lzma_vli target) { const index_tree_node *result = NULL; const index_tree_node *node = tree->root; assert(tree->leftmost == NULL || tree->leftmost->uncompressed_base == 0); // Consecutive nodes may have the same uncompressed_base. // We must pick the rightmost one. while (node != NULL) { if (node->uncompressed_base > target) { node = node->left; } else { result = node; node = node->right; } } return (void *)(result); } /// Allocate and initialize a new Stream using the given base offsets. static index_stream * index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, uint32_t stream_number, lzma_vli block_number_base, const lzma_allocator *allocator) { index_stream *s = lzma_alloc(sizeof(index_stream), allocator); if (s == NULL) return NULL; s->node.uncompressed_base = uncompressed_base; s->node.compressed_base = compressed_base; s->node.parent = NULL; s->node.left = NULL; s->node.right = NULL; s->number = stream_number; s->block_number_base = block_number_base; index_tree_init(&s->groups); s->record_count = 0; s->index_list_size = 0; s->stream_flags.version = UINT32_MAX; s->stream_padding = 0; return s; } /// Free the memory allocated for a Stream and its Record groups. static void index_stream_end(void *node, const lzma_allocator *allocator) { index_stream *s = node; index_tree_end(&s->groups, allocator, &lzma_free); lzma_free(s, allocator); return; } static lzma_index * index_init_plain(const lzma_allocator *allocator) { lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); if (i != NULL) { index_tree_init(&i->streams); i->uncompressed_size = 0; i->total_size = 0; i->record_count = 0; i->index_list_size = 0; i->prealloc = INDEX_GROUP_SIZE; i->checks = 0; } return i; } extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) { lzma_index *i = index_init_plain(allocator); if (i == NULL) return NULL; index_stream *s = index_stream_init(0, 0, 1, 0, allocator); if (s == NULL) { lzma_free(i, allocator); return NULL; } index_tree_append(&i->streams, &s->node); return i; } extern LZMA_API(void) lzma_index_end(lzma_index *i, const lzma_allocator *allocator) { // NOTE: If you modify this function, check also the bottom // of lzma_index_cat(). if (i != NULL) { index_tree_end(&i->streams, allocator, &index_stream_end); lzma_free(i, allocator); } return; } extern void lzma_index_prealloc(lzma_index *i, lzma_vli records) { if (records > PREALLOC_MAX) records = PREALLOC_MAX; i->prealloc = (size_t)(records); return; } extern LZMA_API(uint64_t) lzma_index_memusage(lzma_vli streams, lzma_vli blocks) { // This calculates an upper bound that is only a little bit // bigger than the exact maximum memory usage with the given // parameters. // Typical malloc() overhead is 2 * sizeof(void *) but we take // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE // instead would give too inaccurate estimate. const size_t alloc_overhead = 4 * sizeof(void *); // Amount of memory needed for each Stream base structures. // We assume that every Stream has at least one Block and // thus at least one group. const size_t stream_base = sizeof(index_stream) + sizeof(index_group) + 2 * alloc_overhead; // Amount of memory needed per group. const size_t group_base = sizeof(index_group) + INDEX_GROUP_SIZE * sizeof(index_record) + alloc_overhead; // Number of groups. There may actually be more, but that overhead // has been taken into account in stream_base already. const lzma_vli groups = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; // Memory used by index_stream and index_group structures. const uint64_t streams_mem = streams * stream_base; const uint64_t groups_mem = groups * group_base; // Memory used by the base structure. const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; // Validate the arguments and catch integer overflows. // Maximum number of Streams is "only" UINT32_MAX, because // that limit is used by the tree containing the Streams. const uint64_t limit = UINT64_MAX - index_base; if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX || streams > limit / stream_base || groups > limit / group_base || limit - streams_mem < groups_mem) return UINT64_MAX; return index_base + streams_mem + groups_mem; } extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) { return lzma_index_memusage(i->streams.count, i->record_count); } extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) { return i->record_count; } extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) { return i->streams.count; } extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) { return index_size(i->record_count, i->index_list_size); } extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) { return i->total_size; } extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) { // Stream Header + Blocks + Index + Stream Footer return LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->record_count, i->index_list_size) + LZMA_STREAM_HEADER_SIZE; } static lzma_vli index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, lzma_vli record_count, lzma_vli index_list_size, lzma_vli stream_padding) { // Earlier Streams and Stream Paddings + Stream Header // + Blocks + Index + Stream Footer + Stream Padding // // This might go over LZMA_VLI_MAX due to too big unpadded_sum // when this function is used in lzma_index_append(). lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + stream_padding + vli_ceil4(unpadded_sum); if (file_size > LZMA_VLI_MAX) return LZMA_VLI_UNKNOWN; // The same applies here. file_size += index_size(record_count, index_list_size); if (file_size > LZMA_VLI_MAX) return LZMA_VLI_UNKNOWN; return file_size; } extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) { const index_stream *s = (const index_stream *)(i->streams.rightmost); const index_group *g = (const index_group *)(s->groups.rightmost); return index_file_size(s->node.compressed_base, g == NULL ? 0 : g->records[g->last].unpadded_sum, s->record_count, s->index_list_size, s->stream_padding); } extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) { return i->uncompressed_size; } extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) { uint32_t checks = i->checks; // Get the type of the Check of the last Stream too. const index_stream *s = (const index_stream *)(i->streams.rightmost); if (s->stream_flags.version != UINT32_MAX) checks |= UINT32_C(1) << s->stream_flags.check; return checks; } extern uint32_t lzma_index_padding_size(const lzma_index *i) { return (LZMA_VLI_C(4) - index_size_unpadded( i->record_count, i->index_list_size)) & 3; } extern LZMA_API(lzma_ret) lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) { if (i == NULL || stream_flags == NULL) return LZMA_PROG_ERROR; // Validate the Stream Flags. return_if_error(lzma_stream_flags_compare( stream_flags, stream_flags)); index_stream *s = (index_stream *)(i->streams.rightmost); s->stream_flags = *stream_flags; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) { if (i == NULL || stream_padding > LZMA_VLI_MAX || (stream_padding & 3) != 0) return LZMA_PROG_ERROR; index_stream *s = (index_stream *)(i->streams.rightmost); // Check that the new value won't make the file grow too big. const lzma_vli old_stream_padding = s->stream_padding; s->stream_padding = 0; if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { s->stream_padding = old_stream_padding; return LZMA_DATA_ERROR; } s->stream_padding = stream_padding; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_append(lzma_index *i, const lzma_allocator *allocator, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate. if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; index_stream *s = (index_stream *)(i->streams.rightmost); index_group *g = (index_group *)(s->groups.rightmost); const lzma_vli compressed_base = g == NULL ? 0 : vli_ceil4(g->records[g->last].unpadded_sum); const lzma_vli uncompressed_base = g == NULL ? 0 : g->records[g->last].uncompressed_sum; const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); // Check that uncompressed size will not overflow. if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX) return LZMA_DATA_ERROR; // Check that the file size will stay within limits. if (index_file_size(s->node.compressed_base, compressed_base + unpadded_size, s->record_count + 1, s->index_list_size + index_list_size_add, s->stream_padding) == LZMA_VLI_UNKNOWN) return LZMA_DATA_ERROR; // The size of the Index field must not exceed the maximum value // that can be stored in the Backward Size field. if (index_size(i->record_count + 1, i->index_list_size + index_list_size_add) > LZMA_BACKWARD_SIZE_MAX) return LZMA_DATA_ERROR; if (g != NULL && g->last + 1 < g->allocated) { // There is space in the last group at least for one Record. ++g->last; } else { // We need to allocate a new group. g = lzma_alloc(sizeof(index_group) + i->prealloc * sizeof(index_record), allocator); if (g == NULL) return LZMA_MEM_ERROR; g->last = 0; g->allocated = i->prealloc; // Reset prealloc so that if the application happens to // add new Records, the allocation size will be sane. i->prealloc = INDEX_GROUP_SIZE; // Set the start offsets of this group. g->node.uncompressed_base = uncompressed_base; g->node.compressed_base = compressed_base; g->number_base = s->record_count + 1; // Add the new group to the Stream. index_tree_append(&s->groups, &g->node); } // Add the new Record to the group. g->records[g->last].uncompressed_sum = uncompressed_base + uncompressed_size; g->records[g->last].unpadded_sum = compressed_base + unpadded_size; // Update the totals. ++s->record_count; s->index_list_size += index_list_size_add; i->total_size += vli_ceil4(unpadded_size); i->uncompressed_size += uncompressed_size; ++i->record_count; i->index_list_size += index_list_size_add; return LZMA_OK; } /// Structure to pass info to index_cat_helper() typedef struct { /// Uncompressed size of the destination lzma_vli uncompressed_size; /// Compressed file size of the destination lzma_vli file_size; /// Same as above but for Block numbers lzma_vli block_number_add; /// Number of Streams that were in the destination index before we /// started appending new Streams from the source index. This is /// used to fix the Stream numbering. uint32_t stream_number_add; /// Destination index' Stream tree index_tree *streams; } index_cat_info; /// Add the Stream nodes from the source index to dest using recursion. /// Simplest iterative traversal of the source tree wouldn't work, because /// we update the pointers in nodes when moving them to the destination tree. static void index_cat_helper(const index_cat_info *info, index_stream *this) { index_stream *left = (index_stream *)(this->node.left); index_stream *right = (index_stream *)(this->node.right); if (left != NULL) index_cat_helper(info, left); this->node.uncompressed_base += info->uncompressed_size; this->node.compressed_base += info->file_size; this->number += info->stream_number_add; this->block_number_base += info->block_number_add; index_tree_append(info->streams, &this->node); if (right != NULL) index_cat_helper(info, right); return; } extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, const lzma_allocator *allocator) { if (dest == NULL || src == NULL) return LZMA_PROG_ERROR; const lzma_vli dest_file_size = lzma_index_file_size(dest); // Check that we don't exceed the file size limits. if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX || dest->uncompressed_size + src->uncompressed_size > LZMA_VLI_MAX) return LZMA_DATA_ERROR; // Check that the encoded size of the combined lzma_indexes stays // within limits. In theory, this should be done only if we know // that the user plans to actually combine the Streams and thus // construct a single Index (probably rare). However, exceeding // this limit is quite theoretical, so we do this check always // to simplify things elsewhere. { const lzma_vli dest_size = index_size_unpadded( dest->record_count, dest->index_list_size); const lzma_vli src_size = index_size_unpadded( src->record_count, src->index_list_size); if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) return LZMA_DATA_ERROR; } // Optimize the last group to minimize memory usage. Allocation has // to be done before modifying dest or src. { index_stream *s = (index_stream *)(dest->streams.rightmost); index_group *g = (index_group *)(s->groups.rightmost); if (g != NULL && g->last + 1 < g->allocated) { assert(g->node.left == NULL); assert(g->node.right == NULL); index_group *newg = lzma_alloc(sizeof(index_group) + (g->last + 1) * sizeof(index_record), allocator); if (newg == NULL) return LZMA_MEM_ERROR; newg->node = g->node; newg->allocated = g->last + 1; newg->last = g->last; newg->number_base = g->number_base; memcpy(newg->records, g->records, newg->allocated * sizeof(index_record)); if (g->node.parent != NULL) { assert(g->node.parent->right == &g->node); g->node.parent->right = &newg->node; } if (s->groups.leftmost == &g->node) { assert(s->groups.root == &g->node); s->groups.leftmost = &newg->node; s->groups.root = &newg->node; } assert(s->groups.rightmost == &g->node); s->groups.rightmost = &newg->node; lzma_free(g, allocator); // NOTE: newg isn't leaked here because // newg == (void *)&newg->node. } } // dest->checks includes the check types of all except the last Stream // in dest. Set the bit for the check type of the last Stream now so // that it won't get lost when Stream(s) from src are appended to dest. dest->checks = lzma_index_checks(dest); // Add all the Streams from src to dest. Update the base offsets // of each Stream from src. const index_cat_info info = { .uncompressed_size = dest->uncompressed_size, .file_size = dest_file_size, .stream_number_add = dest->streams.count, .block_number_add = dest->record_count, .streams = &dest->streams, }; index_cat_helper(&info, (index_stream *)(src->streams.root)); // Update info about all the combined Streams. dest->uncompressed_size += src->uncompressed_size; dest->total_size += src->total_size; dest->record_count += src->record_count; dest->index_list_size += src->index_list_size; dest->checks |= src->checks; // There's nothing else left in src than the base structure. lzma_free(src, allocator); return LZMA_OK; } /// Duplicate an index_stream. static index_stream * index_dup_stream(const index_stream *src, const lzma_allocator *allocator) { // Catch a somewhat theoretical integer overflow. if (src->record_count > PREALLOC_MAX) return NULL; // Allocate and initialize a new Stream. index_stream *dest = index_stream_init(src->node.compressed_base, src->node.uncompressed_base, src->number, src->block_number_base, allocator); if (dest == NULL) return NULL; // Copy the overall information. dest->record_count = src->record_count; dest->index_list_size = src->index_list_size; dest->stream_flags = src->stream_flags; dest->stream_padding = src->stream_padding; // Return if there are no groups to duplicate. if (src->groups.leftmost == NULL) return dest; // Allocate memory for the Records. We put all the Records into // a single group. It's simplest and also tends to make // lzma_index_locate() a little bit faster with very big Indexes. index_group *destg = lzma_alloc(sizeof(index_group) + src->record_count * sizeof(index_record), allocator); if (destg == NULL) { index_stream_end(dest, allocator); return NULL; } // Initialize destg. destg->node.uncompressed_base = 0; destg->node.compressed_base = 0; destg->number_base = 1; destg->allocated = src->record_count; destg->last = src->record_count - 1; // Go through all the groups in src and copy the Records into destg. const index_group *srcg = (const index_group *)(src->groups.leftmost); size_t i = 0; do { memcpy(destg->records + i, srcg->records, (srcg->last + 1) * sizeof(index_record)); i += srcg->last + 1; srcg = index_tree_next(&srcg->node); } while (srcg != NULL); assert(i == destg->allocated); // Add the group to the new Stream. index_tree_append(&dest->groups, &destg->node); return dest; } extern LZMA_API(lzma_index *) lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator) { // Allocate the base structure (no initial Stream). lzma_index *dest = index_init_plain(allocator); if (dest == NULL) return NULL; // Copy the totals. dest->uncompressed_size = src->uncompressed_size; dest->total_size = src->total_size; dest->record_count = src->record_count; dest->index_list_size = src->index_list_size; // Copy the Streams and the groups in them. const index_stream *srcstream = (const index_stream *)(src->streams.leftmost); do { index_stream *deststream = index_dup_stream( srcstream, allocator); if (deststream == NULL) { lzma_index_end(dest, allocator); return NULL; } index_tree_append(&dest->streams, &deststream->node); srcstream = index_tree_next(&srcstream->node); } while (srcstream != NULL); return dest; } /// Indexing for lzma_index_iter.internal[] enum { ITER_INDEX, ITER_STREAM, ITER_GROUP, ITER_RECORD, ITER_METHOD, }; /// Values for lzma_index_iter.internal[ITER_METHOD].s enum { ITER_METHOD_NORMAL, ITER_METHOD_NEXT, ITER_METHOD_LEFTMOST, }; static void iter_set_info(lzma_index_iter *iter) { const lzma_index *i = iter->internal[ITER_INDEX].p; const index_stream *stream = iter->internal[ITER_STREAM].p; const index_group *group = iter->internal[ITER_GROUP].p; const size_t record = iter->internal[ITER_RECORD].s; // lzma_index_iter.internal must not contain a pointer to the last // group in the index, because that may be reallocated by // lzma_index_cat(). if (group == NULL) { // There are no groups. assert(stream->groups.root == NULL); iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; } else if (i->streams.rightmost != &stream->node || stream->groups.rightmost != &group->node) { // The group is not not the last group in the index. iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; } else if (stream->groups.leftmost != &group->node) { // The group isn't the only group in the Stream, thus we // know that it must have a parent group i.e. it's not // the root node. assert(stream->groups.root != &group->node); assert(group->node.parent->right == &group->node); iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; iter->internal[ITER_GROUP].p = group->node.parent; } else { // The Stream has only one group. assert(stream->groups.root == &group->node); assert(group->node.parent == NULL); iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; iter->internal[ITER_GROUP].p = NULL; } // NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t // internally. iter->stream.number = stream->number; iter->stream.block_count = stream->record_count; iter->stream.compressed_offset = stream->node.compressed_base; iter->stream.uncompressed_offset = stream->node.uncompressed_base; // iter->stream.flags will be NULL if the Stream Flags haven't been // set with lzma_index_stream_flags(). iter->stream.flags = stream->stream_flags.version == UINT32_MAX ? NULL : &stream->stream_flags; iter->stream.padding = stream->stream_padding; if (stream->groups.rightmost == NULL) { // Stream has no Blocks. iter->stream.compressed_size = index_size(0, 0) + 2 * LZMA_STREAM_HEADER_SIZE; iter->stream.uncompressed_size = 0; } else { const index_group *g = (const index_group *)( stream->groups.rightmost); // Stream Header + Stream Footer + Index + Blocks iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + index_size(stream->record_count, stream->index_list_size) + vli_ceil4(g->records[g->last].unpadded_sum); iter->stream.uncompressed_size = g->records[g->last].uncompressed_sum; } if (group != NULL) { iter->block.number_in_stream = group->number_base + record; iter->block.number_in_file = iter->block.number_in_stream + stream->block_number_base; iter->block.compressed_stream_offset = record == 0 ? group->node.compressed_base : vli_ceil4(group->records[ record - 1].unpadded_sum); iter->block.uncompressed_stream_offset = record == 0 ? group->node.uncompressed_base : group->records[record - 1].uncompressed_sum; iter->block.uncompressed_size = group->records[record].uncompressed_sum - iter->block.uncompressed_stream_offset; iter->block.unpadded_size = group->records[record].unpadded_sum - iter->block.compressed_stream_offset; iter->block.total_size = vli_ceil4(iter->block.unpadded_size); iter->block.compressed_stream_offset += LZMA_STREAM_HEADER_SIZE; iter->block.compressed_file_offset = iter->block.compressed_stream_offset + iter->stream.compressed_offset; iter->block.uncompressed_file_offset = iter->block.uncompressed_stream_offset + iter->stream.uncompressed_offset; } return; } extern LZMA_API(void) lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) { iter->internal[ITER_INDEX].p = i; lzma_index_iter_rewind(iter); return; } extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) { iter->internal[ITER_STREAM].p = NULL; iter->internal[ITER_GROUP].p = NULL; iter->internal[ITER_RECORD].s = 0; iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; return; } extern LZMA_API(lzma_bool) lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) { // Catch unsupported mode values. if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) return true; const lzma_index *i = iter->internal[ITER_INDEX].p; const index_stream *stream = iter->internal[ITER_STREAM].p; const index_group *group = NULL; size_t record = iter->internal[ITER_RECORD].s; // If we are being asked for the next Stream, leave group to NULL // so that the rest of the this function thinks that this Stream // has no groups and will thus go to the next Stream. if (mode != LZMA_INDEX_ITER_STREAM) { // Get the pointer to the current group. See iter_set_inf() // for explanation. switch (iter->internal[ITER_METHOD].s) { case ITER_METHOD_NORMAL: group = iter->internal[ITER_GROUP].p; break; case ITER_METHOD_NEXT: group = index_tree_next(iter->internal[ITER_GROUP].p); break; case ITER_METHOD_LEFTMOST: group = (const index_group *)( stream->groups.leftmost); break; } } again: if (stream == NULL) { // We at the beginning of the lzma_index. // Locate the first Stream. stream = (const index_stream *)(i->streams.leftmost); if (mode >= LZMA_INDEX_ITER_BLOCK) { // Since we are being asked to return information // about the first a Block, skip Streams that have // no Blocks. while (stream->groups.leftmost == NULL) { stream = index_tree_next(&stream->node); if (stream == NULL) return true; } } // Start from the first Record in the Stream. group = (const index_group *)(stream->groups.leftmost); record = 0; } else if (group != NULL && record < group->last) { // The next Record is in the same group. ++record; } else { // This group has no more Records or this Stream has // no Blocks at all. record = 0; // If group is not NULL, this Stream has at least one Block // and thus at least one group. Find the next group. if (group != NULL) group = index_tree_next(&group->node); if (group == NULL) { // This Stream has no more Records. Find the next // Stream. If we are being asked to return information // about a Block, we skip empty Streams. do { stream = index_tree_next(&stream->node); if (stream == NULL) return true; } while (mode >= LZMA_INDEX_ITER_BLOCK && stream->groups.leftmost == NULL); group = (const index_group *)( stream->groups.leftmost); } } if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { // We need to look for the next Block again if this Block // is empty. if (record == 0) { if (group->node.uncompressed_base == group->records[0].uncompressed_sum) goto again; } else if (group->records[record - 1].uncompressed_sum == group->records[record].uncompressed_sum) { goto again; } } iter->internal[ITER_STREAM].p = stream; iter->internal[ITER_GROUP].p = group; iter->internal[ITER_RECORD].s = record; iter_set_info(iter); return false; } extern LZMA_API(lzma_bool) lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) { const lzma_index *i = iter->internal[ITER_INDEX].p; // If the target is past the end of the file, return immediately. if (i->uncompressed_size <= target) return true; // Locate the Stream containing the target offset. const index_stream *stream = index_tree_locate(&i->streams, target); assert(stream != NULL); target -= stream->node.uncompressed_base; // Locate the group containing the target offset. const index_group *group = index_tree_locate(&stream->groups, target); assert(group != NULL); // Use binary search to locate the exact Record. It is the first // Record whose uncompressed_sum is greater than target. // This is because we want the rightmost Record that fulfills the // search criterion. It is possible that there are empty Blocks; // we don't want to return them. size_t left = 0; size_t right = group->last; while (left < right) { const size_t pos = left + (right - left) / 2; if (group->records[pos].uncompressed_sum <= target) left = pos + 1; else right = pos; } iter->internal[ITER_STREAM].p = stream; iter->internal[ITER_GROUP].p = group; iter->internal[ITER_RECORD].s = left; iter_set_info(iter); return false; } diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h index 64e97247dd33..031efcc718f2 100644 --- a/src/liblzma/common/index.h +++ b/src/liblzma/common/index.h @@ -1,73 +1,81 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.h /// \brief Handling of Index +/// \note This header file does not include common.h or lzma.h because +/// this file is needed by both liblzma internally and by the +/// tests. Including common.h will include and define many things +/// the tests do not need and prevents issues with header file +/// include order. This way, if lzma.h or common.h are not +/// included before this file it will break on every OS instead +/// of causing more subtle errors. // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_INDEX_H #define LZMA_INDEX_H -#include "common.h" - /// Minimum Unpadded Size #define UNPADDED_SIZE_MIN LZMA_VLI_C(5) /// Maximum Unpadded Size #define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) +/// Index Indicator based on xz specification +#define INDEX_INDICATOR 0 + /// Get the size of the Index Padding field. This is needed by Index encoder /// and decoder, but applications should have no use for this. extern uint32_t lzma_index_padding_size(const lzma_index *i); /// Set for how many Records to allocate memory the next time /// lzma_index_append() needs to allocate space for a new Record. /// This is used only by the Index decoder. extern void lzma_index_prealloc(lzma_index *i, lzma_vli records); /// Round the variable-length integer to the next multiple of four. static inline lzma_vli vli_ceil4(lzma_vli vli) { assert(vli <= LZMA_VLI_MAX); return (vli + 3) & ~LZMA_VLI_C(3); } /// Calculate the size of the Index field excluding Index Padding static inline lzma_vli index_size_unpadded(lzma_vli count, lzma_vli index_list_size) { // Index Indicator + Number of Records + List of Records + CRC32 return 1 + lzma_vli_size(count) + index_list_size + 4; } /// Calculate the size of the Index field including Index Padding static inline lzma_vli index_size(lzma_vli count, lzma_vli index_list_size) { return vli_ceil4(index_size_unpadded(count, index_list_size)); } /// Calculate the total size of the Stream static inline lzma_vli index_stream_size(lzma_vli blocks_size, lzma_vli count, lzma_vli index_list_size) { return LZMA_STREAM_HEADER_SIZE + blocks_size + index_size(count, index_list_size) + LZMA_STREAM_HEADER_SIZE; } #endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c index b26898853381..8622b2f07704 100644 --- a/src/liblzma/common/index_decoder.c +++ b/src/liblzma/common/index_decoder.c @@ -1,355 +1,355 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index_decoder.c /// \brief Decodes the Index field // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "index_decoder.h" #include "check.h" typedef struct { enum { SEQ_INDICATOR, SEQ_COUNT, SEQ_MEMUSAGE, SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, SEQ_CRC32, } sequence; /// Memory usage limit uint64_t memlimit; /// Target Index lzma_index *index; /// Pointer give by the application, which is set after /// successful decoding. lzma_index **index_ptr; /// Number of Records left to decode. lzma_vli count; /// The most recent Unpadded Size field lzma_vli unpadded_size; /// The most recent Uncompressed Size field lzma_vli uncompressed_size; /// Position in integers size_t pos; /// CRC32 of the List of Records field uint32_t crc32; } lzma_index_coder; static lzma_ret index_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out lzma_attribute((__unused__)), size_t *restrict out_pos lzma_attribute((__unused__)), size_t out_size lzma_attribute((__unused__)), lzma_action action lzma_attribute((__unused__))) { lzma_index_coder *coder = coder_ptr; // Similar optimization as in index_encoder.c const size_t in_start = *in_pos; lzma_ret ret = LZMA_OK; while (*in_pos < in_size) switch (coder->sequence) { case SEQ_INDICATOR: // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or // LZMA_FORMAT_ERROR, because a typical usage case for Index // decoder is when parsing the Stream backwards. If seeking // backward from the Stream Footer gives us something that // doesn't begin with Index Indicator, the file is considered // corrupt, not "programming error" or "unrecognized file // format". One could argue that the application should // verify the Index Indicator before trying to decode the // Index, but well, I suppose it is simpler this way. - if (in[(*in_pos)++] != 0x00) + if (in[(*in_pos)++] != INDEX_INDICATOR) return LZMA_DATA_ERROR; coder->sequence = SEQ_COUNT; break; case SEQ_COUNT: ret = lzma_vli_decode(&coder->count, &coder->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; coder->pos = 0; coder->sequence = SEQ_MEMUSAGE; // Fall through case SEQ_MEMUSAGE: if (lzma_index_memusage(1, coder->count) > coder->memlimit) { ret = LZMA_MEMLIMIT_ERROR; goto out; } // Tell the Index handling code how many Records this // Index has to allow it to allocate memory more efficiently. lzma_index_prealloc(coder->index, coder->count); ret = LZMA_OK; coder->sequence = coder->count == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { lzma_vli *size = coder->sequence == SEQ_UNPADDED ? &coder->unpadded_size : &coder->uncompressed_size; ret = lzma_vli_decode(size, &coder->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; ret = LZMA_OK; coder->pos = 0; if (coder->sequence == SEQ_UNPADDED) { // Validate that encoded Unpadded Size isn't too small // or too big. if (coder->unpadded_size < UNPADDED_SIZE_MIN || coder->unpadded_size > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; coder->sequence = SEQ_UNCOMPRESSED; } else { // Add the decoded Record to the Index. return_if_error(lzma_index_append( coder->index, allocator, coder->unpadded_size, coder->uncompressed_size)); // Check if this was the last Record. coder->sequence = --coder->count == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; } case SEQ_PADDING_INIT: coder->pos = lzma_index_padding_size(coder->index); coder->sequence = SEQ_PADDING; // Fall through case SEQ_PADDING: if (coder->pos > 0) { --coder->pos; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; break; } // Finish the CRC32 calculation. coder->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, coder->crc32); coder->sequence = SEQ_CRC32; // Fall through case SEQ_CRC32: do { if (*in_pos == in_size) return LZMA_OK; if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) != in[(*in_pos)++]) { #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION return LZMA_DATA_ERROR; #endif } } while (++coder->pos < 4); // Decoding was successful, now we can let the application // see the decoded Index. *coder->index_ptr = coder->index; // Make index NULL so we don't free it unintentionally. coder->index = NULL; return LZMA_STREAM_END; default: assert(0); return LZMA_PROG_ERROR; } out: // Update the CRC32, coder->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, coder->crc32); return ret; } static void index_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_index_coder *coder = coder_ptr; lzma_index_end(coder->index, allocator); lzma_free(coder, allocator); return; } static lzma_ret index_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_index_coder *coder = coder_ptr; *memusage = lzma_index_memusage(1, coder->count); *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < *memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } static lzma_ret index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator, lzma_index **i, uint64_t memlimit) { // Remember the pointer given by the application. We will set it // to point to the decoded Index only if decoding is successful. // Before that, keep it NULL so that applications can always safely // pass it to lzma_index_end() no matter did decoding succeed or not. coder->index_ptr = i; *i = NULL; // We always allocate a new lzma_index. coder->index = lzma_index_init(allocator); if (coder->index == NULL) return LZMA_MEM_ERROR; // Initialize the rest. coder->sequence = SEQ_INDICATOR; coder->memlimit = my_max(1, memlimit); coder->count = 0; // Needs to be initialized due to _memconfig(). coder->pos = 0; coder->crc32 = 0; return LZMA_OK; } extern lzma_ret lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_index **i, uint64_t memlimit) { lzma_next_coder_init(&lzma_index_decoder_init, next, allocator); if (i == NULL) return LZMA_PROG_ERROR; lzma_index_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_index_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &index_decode; next->end = &index_decoder_end; next->memconfig = &index_decoder_memconfig; coder->index = NULL; } else { lzma_index_end(coder->index, allocator); } return index_decoder_reset(coder, allocator, i, memlimit); } extern LZMA_API(lzma_ret) lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) { lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) { // Sanity checks if (i == NULL || memlimit == NULL || in == NULL || in_pos == NULL || *in_pos > in_size) return LZMA_PROG_ERROR; // Initialize the decoder. lzma_index_coder coder; return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); // Store the input start position so that we can restore it in case // of an error. const size_t in_start = *in_pos; // Do the actual decoding. lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); if (ret == LZMA_STREAM_END) { ret = LZMA_OK; } else { // Something went wrong, free the Index structure and restore // the input position. lzma_index_end(coder.index, allocator); *in_pos = in_start; if (ret == LZMA_OK) { // The input is truncated or otherwise corrupt. // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR // like lzma_vli_decode() does in single-call mode. ret = LZMA_DATA_ERROR; } else if (ret == LZMA_MEMLIMIT_ERROR) { // Tell the caller how much memory would have // been needed. *memlimit = lzma_index_memusage(1, coder.count); } } return ret; } diff --git a/src/liblzma/common/index_decoder.h b/src/liblzma/common/index_decoder.h index 1af433b58b56..3fec483331a6 100644 --- a/src/liblzma/common/index_decoder.h +++ b/src/liblzma/common/index_decoder.h @@ -1,24 +1,25 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index_decoder.h /// \brief Decodes the Index field // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_INDEX_DECODER_H #define LZMA_INDEX_DECODER_H +#include "common.h" #include "index.h" extern lzma_ret lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_index **i, uint64_t memlimit); #endif diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c index ac97d0cebf81..c7cafb72decc 100644 --- a/src/liblzma/common/index_encoder.c +++ b/src/liblzma/common/index_encoder.c @@ -1,256 +1,256 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index_encoder.c /// \brief Encodes the Index field // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "index_encoder.h" #include "index.h" #include "check.h" typedef struct { enum { SEQ_INDICATOR, SEQ_COUNT, SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_NEXT, SEQ_PADDING, SEQ_CRC32, } sequence; /// Index being encoded const lzma_index *index; /// Iterator for the Index being encoded lzma_index_iter iter; /// Position in integers size_t pos; /// CRC32 of the List of Records field uint32_t crc32; } lzma_index_coder; static lzma_ret index_encode(void *coder_ptr, const lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in lzma_attribute((__unused__)), size_t *restrict in_pos lzma_attribute((__unused__)), size_t in_size lzma_attribute((__unused__)), uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action lzma_attribute((__unused__))) { lzma_index_coder *coder = coder_ptr; // Position where to start calculating CRC32. The idea is that we // need to call lzma_crc32() only once per call to index_encode(). const size_t out_start = *out_pos; // Return value to use if we return at the end of this function. // We use "goto out" to jump out of the while-switch construct // instead of returning directly, because that way we don't need // to copypaste the lzma_crc32() call to many places. lzma_ret ret = LZMA_OK; while (*out_pos < out_size) switch (coder->sequence) { case SEQ_INDICATOR: - out[*out_pos] = 0x00; + out[*out_pos] = INDEX_INDICATOR; ++*out_pos; coder->sequence = SEQ_COUNT; break; case SEQ_COUNT: { const lzma_vli count = lzma_index_block_count(coder->index); ret = lzma_vli_encode(count, &coder->pos, out, out_pos, out_size); if (ret != LZMA_STREAM_END) goto out; ret = LZMA_OK; coder->pos = 0; coder->sequence = SEQ_NEXT; break; } case SEQ_NEXT: if (lzma_index_iter_next( &coder->iter, LZMA_INDEX_ITER_BLOCK)) { // Get the size of the Index Padding field. coder->pos = lzma_index_padding_size(coder->index); assert(coder->pos <= 3); coder->sequence = SEQ_PADDING; break; } coder->sequence = SEQ_UNPADDED; // Fall through case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { const lzma_vli size = coder->sequence == SEQ_UNPADDED ? coder->iter.block.unpadded_size : coder->iter.block.uncompressed_size; ret = lzma_vli_encode(size, &coder->pos, out, out_pos, out_size); if (ret != LZMA_STREAM_END) goto out; ret = LZMA_OK; coder->pos = 0; // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. ++coder->sequence; break; } case SEQ_PADDING: if (coder->pos > 0) { --coder->pos; out[(*out_pos)++] = 0x00; break; } // Finish the CRC32 calculation. coder->crc32 = lzma_crc32(out + out_start, *out_pos - out_start, coder->crc32); coder->sequence = SEQ_CRC32; // Fall through case SEQ_CRC32: // We don't use the main loop, because we don't want // coder->crc32 to be touched anymore. do { if (*out_pos == out_size) return LZMA_OK; out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) & 0xFF; ++*out_pos; } while (++coder->pos < 4); return LZMA_STREAM_END; default: assert(0); return LZMA_PROG_ERROR; } out: // Update the CRC32. coder->crc32 = lzma_crc32(out + out_start, *out_pos - out_start, coder->crc32); return ret; } static void index_encoder_end(void *coder, const lzma_allocator *allocator) { lzma_free(coder, allocator); return; } static void index_encoder_reset(lzma_index_coder *coder, const lzma_index *i) { lzma_index_iter_init(&coder->iter, i); coder->sequence = SEQ_INDICATOR; coder->index = i; coder->pos = 0; coder->crc32 = 0; return; } extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_index *i) { lzma_next_coder_init(&lzma_index_encoder_init, next, allocator); if (i == NULL) return LZMA_PROG_ERROR; if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_index_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; next->code = &index_encode; next->end = &index_encoder_end; } index_encoder_reset(next->coder, i); return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_encoder(lzma_stream *strm, const lzma_index *i) { lzma_next_strm_init(lzma_index_encoder_init, strm, i); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, uint8_t *out, size_t *out_pos, size_t out_size) { // Validate the arguments. if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size) return LZMA_PROG_ERROR; // Don't try to encode if there's not enough output space. if (out_size - *out_pos < lzma_index_size(i)) return LZMA_BUF_ERROR; // The Index encoder needs just one small data structure so we can // allocate it on stack. lzma_index_coder coder; index_encoder_reset(&coder, i); // Do the actual encoding. This should never fail, but store // the original *out_pos just in case. const size_t out_start = *out_pos; lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0, out, out_pos, out_size, LZMA_RUN); if (ret == LZMA_STREAM_END) { ret = LZMA_OK; } else { // We should never get here, but just in case, restore the // output position and set the error accordingly if something // goes wrong and debugging isn't enabled. assert(0); *out_pos = out_start; ret = LZMA_PROG_ERROR; } return ret; } diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c index 34df85d72f94..f55f7bc8ff8f 100644 --- a/src/liblzma/common/index_hash.c +++ b/src/liblzma/common/index_hash.c @@ -1,336 +1,336 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index_hash.c /// \brief Validates Index by using a hash function // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "index.h" #include "check.h" typedef struct { /// Sum of the Block sizes (including Block Padding) lzma_vli blocks_size; /// Sum of the Uncompressed Size fields lzma_vli uncompressed_size; /// Number of Records lzma_vli count; /// Size of the List of Index Records as bytes lzma_vli index_list_size; /// Check calculated from Unpadded Sizes and Uncompressed Sizes. lzma_check_state check; } lzma_index_hash_info; struct lzma_index_hash_s { enum { SEQ_BLOCK, SEQ_COUNT, SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, SEQ_CRC32, } sequence; /// Information collected while decoding the actual Blocks. lzma_index_hash_info blocks; /// Information collected from the Index field. lzma_index_hash_info records; /// Number of Records not fully decoded lzma_vli remaining; /// Unpadded Size currently being read from an Index Record. lzma_vli unpadded_size; /// Uncompressed Size currently being read from an Index Record. lzma_vli uncompressed_size; /// Position in variable-length integers when decoding them from /// the List of Records. size_t pos; /// CRC32 of the Index uint32_t crc32; }; extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(lzma_index_hash *index_hash, const lzma_allocator *allocator) { if (index_hash == NULL) { index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); if (index_hash == NULL) return NULL; } index_hash->sequence = SEQ_BLOCK; index_hash->blocks.blocks_size = 0; index_hash->blocks.uncompressed_size = 0; index_hash->blocks.count = 0; index_hash->blocks.index_list_size = 0; index_hash->records.blocks_size = 0; index_hash->records.uncompressed_size = 0; index_hash->records.count = 0; index_hash->records.index_list_size = 0; index_hash->unpadded_size = 0; index_hash->uncompressed_size = 0; index_hash->pos = 0; index_hash->crc32 = 0; // These cannot fail because LZMA_CHECK_BEST is known to be supported. (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); return index_hash; } extern LZMA_API(void) lzma_index_hash_end(lzma_index_hash *index_hash, const lzma_allocator *allocator) { lzma_free(index_hash, allocator); return; } extern LZMA_API(lzma_vli) lzma_index_hash_size(const lzma_index_hash *index_hash) { // Get the size of the Index from ->blocks instead of ->records for // cases where application wants to know the Index Size before // decoding the Index. return index_size(index_hash->blocks.count, index_hash->blocks.index_list_size); } /// Updates the sizes and the hash without any validation. static void hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, lzma_vli uncompressed_size) { info->blocks_size += vli_ceil4(unpadded_size); info->uncompressed_size += uncompressed_size; info->index_list_size += lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); ++info->count; const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; lzma_check_update(&info->check, LZMA_CHECK_BEST, (const uint8_t *)(sizes), sizeof(sizes)); return; } extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate the arguments. - if (index_hash->sequence != SEQ_BLOCK + if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK || unpadded_size < UNPADDED_SIZE_MIN || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; // Update the hash. hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); // Validate the properties of *info are still in allowed limits. if (index_hash->blocks.blocks_size > LZMA_VLI_MAX || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX || index_size(index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_BACKWARD_SIZE_MAX || index_stream_size(index_hash->blocks.blocks_size, index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_VLI_MAX) return LZMA_DATA_ERROR; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, size_t *in_pos, size_t in_size) { // Catch zero input buffer here, because in contrast to Index encoder // and decoder functions, applications call this function directly // instead of via lzma_code(), which does the buffer checking. if (*in_pos >= in_size) return LZMA_BUF_ERROR; // NOTE: This function has many similarities to index_encode() and // index_decode() functions found from index_encoder.c and // index_decoder.c. See the comments especially in index_encoder.c. const size_t in_start = *in_pos; lzma_ret ret = LZMA_OK; while (*in_pos < in_size) switch (index_hash->sequence) { case SEQ_BLOCK: // Check the Index Indicator is present. - if (in[(*in_pos)++] != 0x00) + if (in[(*in_pos)++] != INDEX_INDICATOR) return LZMA_DATA_ERROR; index_hash->sequence = SEQ_COUNT; break; case SEQ_COUNT: { ret = lzma_vli_decode(&index_hash->remaining, &index_hash->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; // The count must match the count of the Blocks decoded. if (index_hash->remaining != index_hash->blocks.count) return LZMA_DATA_ERROR; ret = LZMA_OK; index_hash->pos = 0; // Handle the special case when there are no Blocks. index_hash->sequence = index_hash->remaining == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { lzma_vli *size = index_hash->sequence == SEQ_UNPADDED ? &index_hash->unpadded_size : &index_hash->uncompressed_size; ret = lzma_vli_decode(size, &index_hash->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; ret = LZMA_OK; index_hash->pos = 0; if (index_hash->sequence == SEQ_UNPADDED) { if (index_hash->unpadded_size < UNPADDED_SIZE_MIN || index_hash->unpadded_size > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; index_hash->sequence = SEQ_UNCOMPRESSED; } else { // Update the hash. hash_append(&index_hash->records, index_hash->unpadded_size, index_hash->uncompressed_size); // Verify that we don't go over the known sizes. Note // that this validation is simpler than the one used // in lzma_index_hash_append(), because here we know // that values in index_hash->blocks are already // validated and we are fine as long as we don't // exceed them in index_hash->records. if (index_hash->blocks.blocks_size < index_hash->records.blocks_size || index_hash->blocks.uncompressed_size < index_hash->records.uncompressed_size || index_hash->blocks.index_list_size < index_hash->records.index_list_size) return LZMA_DATA_ERROR; // Check if this was the last Record. index_hash->sequence = --index_hash->remaining == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; } case SEQ_PADDING_INIT: index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( index_hash->records.count, index_hash->records.index_list_size)) & 3; index_hash->sequence = SEQ_PADDING; // Fall through case SEQ_PADDING: if (index_hash->pos > 0) { --index_hash->pos; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; break; } // Compare the sizes. if (index_hash->blocks.blocks_size != index_hash->records.blocks_size || index_hash->blocks.uncompressed_size != index_hash->records.uncompressed_size || index_hash->blocks.index_list_size != index_hash->records.index_list_size) return LZMA_DATA_ERROR; // Finish the hashes and compare them. lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); if (memcmp(index_hash->blocks.check.buffer.u8, index_hash->records.check.buffer.u8, lzma_check_size(LZMA_CHECK_BEST)) != 0) return LZMA_DATA_ERROR; // Finish the CRC32 calculation. index_hash->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, index_hash->crc32); index_hash->sequence = SEQ_CRC32; // Fall through case SEQ_CRC32: do { if (*in_pos == in_size) return LZMA_OK; if (((index_hash->crc32 >> (index_hash->pos * 8)) & 0xFF) != in[(*in_pos)++]) { #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION return LZMA_DATA_ERROR; #endif } } while (++index_hash->pos < 4); return LZMA_STREAM_END; default: assert(0); return LZMA_PROG_ERROR; } out: // Update the CRC32, index_hash->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, index_hash->crc32); return ret; } diff --git a/src/liblzma/common/microlzma_encoder.c b/src/liblzma/common/microlzma_encoder.c index d3ef0632ddb3..a787ca25b839 100644 --- a/src/liblzma/common/microlzma_encoder.c +++ b/src/liblzma/common/microlzma_encoder.c @@ -1,140 +1,141 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file microlzma_encoder.c /// \brief Encode into MicroLZMA format // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lzma_encoder.h" typedef struct { /// LZMA1 encoder lzma_next_coder lzma; /// LZMA properties byte (lc/lp/pb) uint8_t props; } lzma_microlzma_coder; static lzma_ret microlzma_encode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_microlzma_coder *coder = coder_ptr; // Remember *out_pos so that we can overwrite the first byte with // the LZMA properties byte. const size_t out_start = *out_pos; // Remember *in_pos so that we can set it based on how many // uncompressed bytes were actually encoded. const size_t in_start = *in_pos; // Set the output size limit based on the available output space. // We know that the encoder supports set_out_limit() so // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible // but lzma_code() has an assertion to not allow it to be returned // from here and I don't want to change that for now, so // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR. uint64_t uncomp_size; if (coder->lzma.set_out_limit(coder->lzma.coder, &uncomp_size, out_size - *out_pos) != LZMA_OK) return LZMA_PROG_ERROR; // set_out_limit fails if this isn't true. assert(out_size - *out_pos >= 6); // Encode as much as possible. const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); if (ret != LZMA_STREAM_END) { if (ret == LZMA_OK) { assert(0); return LZMA_PROG_ERROR; } return ret; } // The first output byte is bitwise-negation of the properties byte. // We know that there is space for this byte because set_out_limit // and the actual encoding succeeded. out[out_start] = (uint8_t)(~coder->props); // The LZMA encoder likely read more input than it was able to encode. // Set *in_pos based on uncomp_size. assert(uncomp_size <= in_size - in_start); *in_pos = in_start + (size_t)(uncomp_size); return ret; } static void microlzma_encoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_microlzma_coder *coder = coder_ptr; lzma_next_end(&coder->lzma, allocator); lzma_free(coder, allocator); return; } static lzma_ret microlzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_options_lzma *options) { lzma_next_coder_init(µlzma_encoder_init, next, allocator); lzma_microlzma_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = µlzma_encode; next->end = µlzma_encoder_end; coder->lzma = LZMA_NEXT_CODER_INIT; } // Encode the properties byte. Bitwise-negation of it will be the // first output byte. - return_if_error(lzma_lzma_lclppb_encode(options, &coder->props)); + if (lzma_lzma_lclppb_encode(options, &coder->props)) + return LZMA_OPTIONS_ERROR; // Initialize the LZMA encoder. const lzma_filter_info filters[2] = { { .id = LZMA_FILTER_LZMA1, .init = &lzma_lzma_encoder_init, .options = (void *)(options), }, { .init = NULL, } }; return lzma_next_filter_init(&coder->lzma, allocator, filters); } extern LZMA_API(lzma_ret) lzma_microlzma_encoder(lzma_stream *strm, const lzma_options_lzma *options) { lzma_next_strm_init(microlzma_encoder_init, strm, options); strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c index af49554a6b0c..73157590e6f4 100644 --- a/src/liblzma/common/stream_buffer_encoder.c +++ b/src/liblzma/common/stream_buffer_encoder.c @@ -1,141 +1,142 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_buffer_encoder.c /// \brief Single-call .xz Stream encoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// +#include "common.h" #include "index.h" /// Maximum size of Index that has exactly one Record. /// Index Indicator + Number of Records + Record + CRC32 rounded up to /// the next multiple of four. #define INDEX_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 4 + 3) & ~3) /// Stream Header, Stream Footer, and Index #define HEADERS_BOUND (2 * LZMA_STREAM_HEADER_SIZE + INDEX_BOUND) extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) { // Get the maximum possible size of a Block. const size_t block_bound = lzma_block_buffer_bound(uncompressed_size); if (block_bound == 0) return 0; // Catch the possible integer overflow and also prevent the size of // the Stream exceeding LZMA_VLI_MAX (theoretically possible on // 64-bit systems). if (my_min(SIZE_MAX, LZMA_VLI_MAX) - block_bound < HEADERS_BOUND) return 0; return block_bound + HEADERS_BOUND; } extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos_ptr, size_t out_size) { // Sanity checks if (filters == NULL || (unsigned int)(check) > LZMA_CHECK_ID_MAX || (in == NULL && in_size != 0) || out == NULL || out_pos_ptr == NULL || *out_pos_ptr > out_size) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(check)) return LZMA_UNSUPPORTED_CHECK; // Note for the paranoids: Index encoder prevents the Stream from // getting too big and still being accepted with LZMA_OK, and Block // encoder catches if the input is too big. So we don't need to // separately check if the buffers are too big. // Use a local copy. We update *out_pos_ptr only if everything // succeeds. size_t out_pos = *out_pos_ptr; // Check that there's enough space for both Stream Header and // Stream Footer. if (out_size - out_pos <= 2 * LZMA_STREAM_HEADER_SIZE) return LZMA_BUF_ERROR; // Reserve space for Stream Footer so we don't need to check for // available space again before encoding Stream Footer. out_size -= LZMA_STREAM_HEADER_SIZE; // Encode the Stream Header. lzma_stream_flags stream_flags = { .version = 0, .check = check, }; if (lzma_stream_header_encode(&stream_flags, out + out_pos) != LZMA_OK) return LZMA_PROG_ERROR; out_pos += LZMA_STREAM_HEADER_SIZE; // Encode a Block but only if there is at least one byte of input. lzma_block block = { .version = 0, .check = check, .filters = filters, }; if (in_size > 0) return_if_error(lzma_block_buffer_encode(&block, allocator, in, in_size, out, &out_pos, out_size)); // Index { // Create an Index. It will have one Record if there was // at least one byte of input to encode. Otherwise the // Index will be empty. lzma_index *i = lzma_index_init(allocator); if (i == NULL) return LZMA_MEM_ERROR; lzma_ret ret = LZMA_OK; if (in_size > 0) ret = lzma_index_append(i, allocator, lzma_block_unpadded_size(&block), block.uncompressed_size); // If adding the Record was successful, encode the Index // and get its size which will be stored into Stream Footer. if (ret == LZMA_OK) { ret = lzma_index_buffer_encode( i, out, &out_pos, out_size); stream_flags.backward_size = lzma_index_size(i); } lzma_index_end(i, allocator); if (ret != LZMA_OK) return ret; } // Stream Footer. We have already reserved space for this. if (lzma_stream_footer_encode(&stream_flags, out + out_pos) != LZMA_OK) return LZMA_PROG_ERROR; out_pos += LZMA_STREAM_HEADER_SIZE; // Everything went fine, make the new output position available // to the application. *out_pos_ptr = out_pos; return LZMA_OK; } diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index dcf7c1499f7e..64283812f29a 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -1,473 +1,474 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder.c /// \brief Decodes .xz Streams // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" #include "block_decoder.h" +#include "index.h" typedef struct { enum { SEQ_STREAM_HEADER, SEQ_BLOCK_HEADER, SEQ_BLOCK_INIT, SEQ_BLOCK_RUN, SEQ_INDEX, SEQ_STREAM_FOOTER, SEQ_STREAM_PADDING, } sequence; /// Block decoder lzma_next_coder block_decoder; /// Block options decoded by the Block Header decoder and used by /// the Block decoder. lzma_block block_options; /// Stream Flags from Stream Header lzma_stream_flags stream_flags; /// Index is hashed so that it can be compared to the sizes of Blocks /// with O(1) memory usage. lzma_index_hash *index_hash; /// Memory usage limit uint64_t memlimit; /// Amount of memory actually needed (only an estimate) uint64_t memusage; /// If true, LZMA_NO_CHECK is returned if the Stream has /// no integrity check. bool tell_no_check; /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has /// an integrity check that isn't supported by this liblzma build. bool tell_unsupported_check; /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. bool tell_any_check; /// If true, we will tell the Block decoder to skip calculating /// and verifying the integrity check. bool ignore_check; /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input (LZMA_FINISH), /// and we aren't in the middle of a Stream, and possible /// Stream Padding is a multiple of four bytes. bool concatenated; /// When decoding concatenated Streams, this is true as long as we /// are decoding the first Stream. This is needed to avoid misleading /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic /// bytes. bool first_stream; /// Write position in buffer[] and position in Stream Padding size_t pos; /// Buffer to hold Stream Header, Block Header, and Stream Footer. /// Block Header has biggest maximum size. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; } lzma_stream_coder; static lzma_ret stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator) { // Initialize the Index hash used to verify the Index. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); if (coder->index_hash == NULL) return LZMA_MEM_ERROR; // Reset the rest of the variables. coder->sequence = SEQ_STREAM_HEADER; coder->pos = 0; return LZMA_OK; } static lzma_ret stream_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_stream_coder *coder = coder_ptr; // When decoding the actual Block, it may be able to produce more // output even if we don't give it any new input. while (true) switch (coder->sequence) { case SEQ_STREAM_HEADER: { // Copy the Stream Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); // Return if we didn't get the whole Stream Header yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Header. const lzma_ret ret = lzma_stream_header_decode( &coder->stream_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR && !coder->first_stream ? LZMA_DATA_ERROR : ret; // If we are decoding concatenated Streams, and the later // Streams have invalid Header Magic Bytes, we give // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. coder->first_stream = false; // Copy the type of the Check so that Block Header and Block // decoders see it. coder->block_options.check = coder->stream_flags.check; // Even if we return LZMA_*_CHECK below, we want // to continue from Block Header decoding. coder->sequence = SEQ_BLOCK_HEADER; // Detect if there's no integrity check or if it is // unsupported if those were requested by the application. if (coder->tell_no_check && coder->stream_flags.check == LZMA_CHECK_NONE) return LZMA_NO_CHECK; if (coder->tell_unsupported_check && !lzma_check_is_supported( coder->stream_flags.check)) return LZMA_UNSUPPORTED_CHECK; if (coder->tell_any_check) return LZMA_GET_CHECK; } // Fall through case SEQ_BLOCK_HEADER: { if (*in_pos >= in_size) return LZMA_OK; if (coder->pos == 0) { // Detect if it's Index. - if (in[*in_pos] == 0x00) { + if (in[*in_pos] == INDEX_INDICATOR) { coder->sequence = SEQ_INDEX; break; } // Calculate the size of the Block Header. Note that // Block Header decoder wants to see this byte too // so don't advance *in_pos. coder->block_options.header_size = lzma_block_header_size_decode( in[*in_pos]); } // Copy the Block Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, coder->block_options.header_size); // Return if we didn't get the whole Block Header yet. if (coder->pos < coder->block_options.header_size) return LZMA_OK; coder->pos = 0; coder->sequence = SEQ_BLOCK_INIT; } // Fall through case SEQ_BLOCK_INIT: { // Checking memusage and doing the initialization needs // its own sequence point because we need to be able to // retry if we return LZMA_MEMLIMIT_ERROR. // Version 1 is needed to support the .ignore_check option. coder->block_options.version = 1; // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so // we don't need to do it here. lzma_filter filters[LZMA_FILTERS_MAX + 1]; coder->block_options.filters = filters; // Decode the Block Header. return_if_error(lzma_block_header_decode(&coder->block_options, allocator, coder->buffer)); // If LZMA_IGNORE_CHECK was used, this flag needs to be set. // It has to be set after lzma_block_header_decode() because // it always resets this to false. coder->block_options.ignore_check = coder->ignore_check; // Check the memory usage limit. const uint64_t memusage = lzma_raw_decoder_memusage(filters); lzma_ret ret; if (memusage == UINT64_MAX) { // One or more unknown Filter IDs. ret = LZMA_OPTIONS_ERROR; } else { // Now we can set coder->memusage since we know that // the filter chain is valid. We don't want // lzma_memusage() to return UINT64_MAX in case of // invalid filter chain. coder->memusage = memusage; if (memusage > coder->memlimit) { // The chain would need too much memory. ret = LZMA_MEMLIMIT_ERROR; } else { // Memory usage is OK. // Initialize the Block decoder. ret = lzma_block_decoder_init( &coder->block_decoder, allocator, &coder->block_options); } } // Free the allocated filter options since they are needed // only to initialize the Block decoder. lzma_filters_free(filters, allocator); coder->block_options.filters = NULL; // Check if memory usage calculation and Block decoder // initialization succeeded. if (ret != LZMA_OK) return ret; coder->sequence = SEQ_BLOCK_RUN; } // Fall through case SEQ_BLOCK_RUN: { const lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); if (ret != LZMA_STREAM_END) return ret; // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_INDEX: { // If we don't have any input, don't call // lzma_index_hash_decode() since it would return // LZMA_BUF_ERROR, which we must not do here. if (*in_pos >= in_size) return LZMA_OK; // Decode the Index and compare it to the hash calculated // from the sizes of the Blocks (if any). const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, in, in_pos, in_size); if (ret != LZMA_STREAM_END) return ret; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); // Return if we didn't get the whole Stream Footer yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Footer. The decoder gives // LZMA_FORMAT_ERROR if the magic bytes don't match, // so convert that return code to LZMA_DATA_ERROR. lzma_stream_flags footer_flags; const lzma_ret ret = lzma_stream_footer_decode( &footer_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR ? LZMA_DATA_ERROR : ret; // Check that Index Size stored in the Stream Footer matches // the real size of the Index field. if (lzma_index_hash_size(coder->index_hash) != footer_flags.backward_size) return LZMA_DATA_ERROR; // Compare that the Stream Flags fields are identical in // both Stream Header and Stream Footer. return_if_error(lzma_stream_flags_compare( &coder->stream_flags, &footer_flags)); if (!coder->concatenated) return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; } // Fall through case SEQ_STREAM_PADDING: assert(coder->concatenated); // Skip over possible Stream Padding. while (true) { if (*in_pos >= in_size) { // Unless LZMA_FINISH was used, we cannot // know if there's more input coming later. if (action != LZMA_FINISH) return LZMA_OK; // Stream Padding must be a multiple of // four bytes. return coder->pos == 0 ? LZMA_STREAM_END : LZMA_DATA_ERROR; } // If the byte is not zero, it probably indicates // beginning of a new Stream (or the file is corrupt). if (in[*in_pos] != 0x00) break; ++*in_pos; coder->pos = (coder->pos + 1) & 3; } // Stream Padding must be a multiple of four bytes (empty // Stream Padding is OK). if (coder->pos != 0) { ++*in_pos; return LZMA_DATA_ERROR; } // Prepare to decode the next Stream. return_if_error(stream_decoder_reset(coder, allocator)); break; default: assert(0); return LZMA_PROG_ERROR; } // Never reached } static void stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_stream_coder *coder = coder_ptr; lzma_next_end(&coder->block_decoder, allocator); lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_check stream_decoder_get_check(const void *coder_ptr) { const lzma_stream_coder *coder = coder_ptr; return coder->stream_flags.check; } static lzma_ret stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_stream_coder *coder = coder_ptr; *memusage = coder->memusage; *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < coder->memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } extern lzma_ret lzma_stream_decoder_init( lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); if (flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_stream_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &stream_decode; next->end = &stream_decoder_end; next->get_check = &stream_decoder_get_check; next->memconfig = &stream_decoder_memconfig; coder->block_decoder = LZMA_NEXT_CODER_INIT; coder->index_hash = NULL; } coder->memlimit = my_max(1, memlimit); coder->memusage = LZMA_MEMUSAGE_BASE; coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; coder->tell_unsupported_check = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; coder->concatenated = (flags & LZMA_CONCATENATED) != 0; coder->first_stream = true; return stream_decoder_reset(coder, allocator); } extern LZMA_API(lzma_ret) lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) { lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/src/liblzma/common/stream_decoder_mt.c b/src/liblzma/common/stream_decoder_mt.c index 5733c764892d..fd5cd7fd280a 100644 --- a/src/liblzma/common/stream_decoder_mt.c +++ b/src/liblzma/common/stream_decoder_mt.c @@ -1,2016 +1,2016 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder_mt.c /// \brief Multithreaded .xz Stream decoder // // Authors: Sebastian Andrzej Siewior // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "block_decoder.h" #include "stream_decoder.h" #include "index.h" #include "outqueue.h" typedef enum { /// Waiting for work. /// Main thread may change this to THR_RUN or THR_EXIT. THR_IDLE, /// Decoding is in progress. /// Main thread may change this to THR_STOP or THR_EXIT. /// The worker thread may change this to THR_IDLE. THR_RUN, /// The main thread wants the thread to stop whatever it was doing /// but not exit. Main thread may change this to THR_EXIT. /// The worker thread may change this to THR_IDLE. THR_STOP, /// The main thread wants the thread to exit. THR_EXIT, } worker_state; typedef enum { /// Partial updates (storing of worker thread progress /// to lzma_outbuf) are disabled. PARTIAL_DISABLED, /// Main thread requests partial updates to be enabled but /// no partial update has been done by the worker thread yet. /// /// Changing from PARTIAL_DISABLED to PARTIAL_START requires /// use of the worker-thread mutex. Other transitions don't /// need a mutex. PARTIAL_START, /// Partial updates are enabled and the worker thread has done /// at least one partial update. PARTIAL_ENABLED, } partial_update_mode; struct worker_thread { /// Worker state is protected with our mutex. worker_state state; /// Input buffer that will contain the whole Block except Block Header. uint8_t *in; /// Amount of memory allocated for "in" size_t in_size; /// Number of bytes written to "in" by the main thread size_t in_filled; /// Number of bytes consumed from "in" by the worker thread. size_t in_pos; /// Amount of uncompressed data that has been decoded. This local /// copy is needed because updating outbuf->pos requires locking /// the main mutex (coder->mutex). size_t out_pos; /// Pointer to the main structure is needed to (1) lock the main /// mutex (coder->mutex) when updating outbuf->pos and (2) when /// putting this thread back to the stack of free threads. struct lzma_stream_coder *coder; /// The allocator is set by the main thread. Since a copy of the /// pointer is kept here, the application must not change the /// allocator before calling lzma_end(). const lzma_allocator *allocator; /// Output queue buffer to which the uncompressed data is written. lzma_outbuf *outbuf; /// Amount of compressed data that has already been decompressed. /// This is updated from in_pos when our mutex is locked. /// This is size_t, not uint64_t, because per-thread progress /// is limited to sizes of allocated buffers. size_t progress_in; /// Like progress_in but for uncompressed data. size_t progress_out; /// Updating outbuf->pos requires locking the main mutex /// (coder->mutex). Since the main thread will only read output /// from the oldest outbuf in the queue, only the worker thread /// that is associated with the oldest outbuf needs to update its /// outbuf->pos. This avoids useless mutex contention that would /// happen if all worker threads were frequently locking the main /// mutex to update their outbuf->pos. /// /// Only when partial_update is something else than PARTIAL_DISABLED, /// this worker thread will update outbuf->pos after each call to /// the Block decoder. partial_update_mode partial_update; /// Block decoder lzma_next_coder block_decoder; /// Thread-specific Block options are needed because the Block /// decoder modifies the struct given to it at initialization. lzma_block block_options; /// Filter chain memory usage uint64_t mem_filters; /// Next structure in the stack of free worker threads. struct worker_thread *next; mythread_mutex mutex; mythread_cond cond; /// The ID of this thread is used to join the thread /// when it's not needed anymore. mythread thread_id; }; struct lzma_stream_coder { enum { SEQ_STREAM_HEADER, SEQ_BLOCK_HEADER, SEQ_BLOCK_INIT, SEQ_BLOCK_THR_INIT, SEQ_BLOCK_THR_RUN, SEQ_BLOCK_DIRECT_INIT, SEQ_BLOCK_DIRECT_RUN, SEQ_INDEX_WAIT_OUTPUT, SEQ_INDEX_DECODE, SEQ_STREAM_FOOTER, SEQ_STREAM_PADDING, SEQ_ERROR, } sequence; /// Block decoder lzma_next_coder block_decoder; /// Every Block Header will be decoded into this structure. /// This is also used to initialize a Block decoder when in /// direct mode. In threaded mode, a thread-specific copy will /// be made for decoder initialization because the Block decoder /// will modify the structure given to it. lzma_block block_options; /// Buffer to hold a filter chain for Block Header decoding and /// initialization. These are freed after successful Block decoder /// initialization or at stream_decoder_mt_end(). The thread-specific /// copy of block_options won't hold a pointer to filters[] after /// initialization. lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Stream Flags from Stream Header lzma_stream_flags stream_flags; /// Index is hashed so that it can be compared to the sizes of Blocks /// with O(1) memory usage. lzma_index_hash *index_hash; /// Maximum wait time if cannot use all the input and cannot /// fill the output buffer. This is in milliseconds. uint32_t timeout; /// Error code from a worker thread. /// /// \note Use mutex. lzma_ret thread_error; /// Error code to return after pending output has been copied out. If /// set in read_output_and_wait(), this is a mirror of thread_error. /// If set in stream_decode_mt() then it's, for example, error that /// occurred when decoding Block Header. lzma_ret pending_error; /// Number of threads that will be created at maximum. uint32_t threads_max; /// Number of thread structures that have been initialized from /// "threads", and thus the number of worker threads actually /// created so far. uint32_t threads_initialized; /// Array of allocated thread-specific structures. When no threads /// are in use (direct mode) this is NULL. In threaded mode this /// points to an array of threads_max number of worker_thread structs. struct worker_thread *threads; /// Stack of free threads. When a thread finishes, it puts itself /// back into this stack. This starts as empty because threads /// are created only when actually needed. /// /// \note Use mutex. struct worker_thread *threads_free; /// The most recent worker thread to which the main thread writes /// the new input from the application. struct worker_thread *thr; /// Output buffer queue for decompressed data from the worker threads /// /// \note Use mutex with operations that need it. lzma_outq outq; mythread_mutex mutex; mythread_cond cond; /// Memory usage that will not be exceeded in multi-threaded mode. /// Single-threaded mode can exceed this even by a large amount. uint64_t memlimit_threading; /// Memory usage limit that should never be exceeded. /// LZMA_MEMLIMIT_ERROR will be returned if decoding isn't possible /// even in single-threaded mode without exceeding this limit. uint64_t memlimit_stop; /// Amount of memory in use by the direct mode decoder /// (coder->block_decoder). In threaded mode this is 0. uint64_t mem_direct_mode; /// Amount of memory needed by the running worker threads. /// This doesn't include the memory needed by the output buffer. /// /// \note Use mutex. uint64_t mem_in_use; /// Amount of memory used by the idle (cached) threads. /// /// \note Use mutex. uint64_t mem_cached; /// Amount of memory needed for the filter chain of the next Block. uint64_t mem_next_filters; /// Amount of memory needed for the thread-specific input buffer /// for the next Block. uint64_t mem_next_in; /// Amount of memory actually needed to decode the next Block /// in threaded mode. This is /// mem_next_filters + mem_next_in + memory needed for lzma_outbuf. uint64_t mem_next_block; /// Amount of compressed data in Stream Header + Blocks that have /// already been finished. /// /// \note Use mutex. uint64_t progress_in; /// Amount of uncompressed data in Blocks that have already /// been finished. /// /// \note Use mutex. uint64_t progress_out; /// If true, LZMA_NO_CHECK is returned if the Stream has /// no integrity check. bool tell_no_check; /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has /// an integrity check that isn't supported by this liblzma build. bool tell_unsupported_check; /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. bool tell_any_check; /// If true, we will tell the Block decoder to skip calculating /// and verifying the integrity check. bool ignore_check; /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input (LZMA_FINISH), /// and we aren't in the middle of a Stream, and possible /// Stream Padding is a multiple of four bytes. bool concatenated; /// If true, we will return any errors immediately instead of first /// producing all output before the location of the error. bool fail_fast; /// When decoding concatenated Streams, this is true as long as we /// are decoding the first Stream. This is needed to avoid misleading /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic /// bytes. bool first_stream; /// This is used to track if the previous call to stream_decode_mt() /// had output space (*out_pos < out_size) and managed to fill the /// output buffer (*out_pos == out_size). This may be set to true /// in read_output_and_wait(). This is read and then reset to false /// at the beginning of stream_decode_mt(). /// /// This is needed to support applications that call lzma_code() in /// such a way that more input is provided only when lzma_code() /// didn't fill the output buffer completely. Basically, this makes /// it easier to convert such applications from single-threaded /// decoder to multi-threaded decoder. bool out_was_filled; /// Write position in buffer[] and position in Stream Padding size_t pos; /// Buffer to hold Stream Header, Block Header, and Stream Footer. /// Block Header has biggest maximum size. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; /// Enables updating of outbuf->pos. This is a callback function that is /// used with lzma_outq_enable_partial_output(). static void worker_enable_partial_update(void *thr_ptr) { struct worker_thread *thr = thr_ptr; mythread_sync(thr->mutex) { thr->partial_update = PARTIAL_START; mythread_cond_signal(&thr->cond); } } /// Things do to at THR_STOP or when finishing a Block. /// This is called with thr->mutex locked. static void worker_stop(struct worker_thread *thr) { // Update memory usage counters. thr->coder->mem_in_use -= thr->in_size; thr->in_size = 0; // thr->in was freed above. thr->coder->mem_in_use -= thr->mem_filters; thr->coder->mem_cached += thr->mem_filters; // Put this thread to the stack of free threads. thr->next = thr->coder->threads_free; thr->coder->threads_free = thr; mythread_cond_signal(&thr->coder->cond); return; } static MYTHREAD_RET_TYPE worker_decoder(void *thr_ptr) { struct worker_thread *thr = thr_ptr; size_t in_filled; partial_update_mode partial_update; lzma_ret ret; next_loop_lock: mythread_mutex_lock(&thr->mutex); next_loop_unlocked: if (thr->state == THR_IDLE) { mythread_cond_wait(&thr->cond, &thr->mutex); goto next_loop_unlocked; } if (thr->state == THR_EXIT) { mythread_mutex_unlock(&thr->mutex); lzma_free(thr->in, thr->allocator); lzma_next_end(&thr->block_decoder, thr->allocator); mythread_mutex_destroy(&thr->mutex); mythread_cond_destroy(&thr->cond); return MYTHREAD_RET_VALUE; } if (thr->state == THR_STOP) { thr->state = THR_IDLE; mythread_mutex_unlock(&thr->mutex); mythread_sync(thr->coder->mutex) { worker_stop(thr); } goto next_loop_lock; } assert(thr->state == THR_RUN); // Update progress info for get_progress(). thr->progress_in = thr->in_pos; thr->progress_out = thr->out_pos; // If we don't have any new input, wait for a signal from the main // thread except if partial output has just been enabled. In that // case we will do one normal run so that the partial output info // gets passed to the main thread. The call to block_decoder.code() // is useless but harmless as it can occur only once per Block. in_filled = thr->in_filled; partial_update = thr->partial_update; if (in_filled == thr->in_pos && partial_update != PARTIAL_START) { mythread_cond_wait(&thr->cond, &thr->mutex); goto next_loop_unlocked; } mythread_mutex_unlock(&thr->mutex); // Pass the input in small chunks to the Block decoder. // This way we react reasonably fast if we are told to stop/exit, // and (when partial update is enabled) we tell about our progress // to the main thread frequently enough. const size_t chunk_size = 16384; if ((in_filled - thr->in_pos) > chunk_size) in_filled = thr->in_pos + chunk_size; ret = thr->block_decoder.code( thr->block_decoder.coder, thr->allocator, thr->in, &thr->in_pos, in_filled, thr->outbuf->buf, &thr->out_pos, thr->outbuf->allocated, LZMA_RUN); if (ret == LZMA_OK) { if (partial_update != PARTIAL_DISABLED) { // The main thread uses thr->mutex to change from // PARTIAL_DISABLED to PARTIAL_START. The main thread // doesn't care about this variable after that so we // can safely change it here to PARTIAL_ENABLED // without a mutex. thr->partial_update = PARTIAL_ENABLED; // The main thread is reading decompressed data // from thr->outbuf. Tell the main thread about // our progress. // // NOTE: It's possible that we consumed input without // producing any new output so it's possible that // only in_pos has changed. In case of PARTIAL_START // it is possible that neither in_pos nor out_pos has // changed. mythread_sync(thr->coder->mutex) { thr->outbuf->pos = thr->out_pos; thr->outbuf->decoder_in_pos = thr->in_pos; mythread_cond_signal(&thr->coder->cond); } } goto next_loop_lock; } // Either we finished successfully (LZMA_STREAM_END) or an error // occurred. Both cases are handled almost identically. The error // case requires updating thr->coder->thread_error. // // The sizes are in the Block Header and the Block decoder // checks that they match, thus we know these: assert(ret != LZMA_STREAM_END || thr->in_pos == thr->in_size); assert(ret != LZMA_STREAM_END || thr->out_pos == thr->block_options.uncompressed_size); // Free the input buffer. Don't update in_size as we need // it later to update thr->coder->mem_in_use. lzma_free(thr->in, thr->allocator); thr->in = NULL; mythread_sync(thr->mutex) { if (thr->state != THR_EXIT) thr->state = THR_IDLE; } mythread_sync(thr->coder->mutex) { // Move our progress info to the main thread. thr->coder->progress_in += thr->in_pos; thr->coder->progress_out += thr->out_pos; thr->progress_in = 0; thr->progress_out = 0; // Mark the outbuf as finished. thr->outbuf->pos = thr->out_pos; thr->outbuf->decoder_in_pos = thr->in_pos; thr->outbuf->finished = true; thr->outbuf->finish_ret = ret; thr->outbuf = NULL; // If an error occurred, tell it to the main thread. if (ret != LZMA_STREAM_END && thr->coder->thread_error == LZMA_OK) thr->coder->thread_error = ret; worker_stop(thr); } goto next_loop_lock; } /// Tells the worker threads to exit and waits for them to terminate. static void threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_EXIT; mythread_cond_signal(&coder->threads[i].cond); } } for (uint32_t i = 0; i < coder->threads_initialized; ++i) mythread_join(coder->threads[i].thread_id); lzma_free(coder->threads, allocator); coder->threads_initialized = 0; coder->threads = NULL; coder->threads_free = NULL; // The threads don't update these when they exit. Do it here. coder->mem_in_use = 0; coder->mem_cached = 0; return; } static void threads_stop(struct lzma_stream_coder *coder) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { // The state must be changed conditionally because // THR_IDLE -> THR_STOP is not a valid state change. if (coder->threads[i].state != THR_IDLE) { coder->threads[i].state = THR_STOP; mythread_cond_signal(&coder->threads[i].cond); } } } return; } /// Initialize a new worker_thread structure and create a new thread. static lzma_ret initialize_new_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // Allocate the coder->threads array if needed. It's done here instead // of when initializing the decoder because we don't need this if we // use the direct mode (we may even free coder->threads in the middle // of the file if we switch from threaded to direct mode). if (coder->threads == NULL) { coder->threads = lzma_alloc( coder->threads_max * sizeof(struct worker_thread), allocator); if (coder->threads == NULL) return LZMA_MEM_ERROR; } // Pick a free structure. assert(coder->threads_initialized < coder->threads_max); struct worker_thread *thr = &coder->threads[coder->threads_initialized]; if (mythread_mutex_init(&thr->mutex)) goto error_mutex; if (mythread_cond_init(&thr->cond)) goto error_cond; thr->state = THR_IDLE; thr->in = NULL; thr->in_size = 0; thr->allocator = allocator; thr->coder = coder; thr->outbuf = NULL; thr->block_decoder = LZMA_NEXT_CODER_INIT; thr->mem_filters = 0; if (mythread_create(&thr->thread_id, worker_decoder, thr)) goto error_thread; ++coder->threads_initialized; coder->thr = thr; return LZMA_OK; error_thread: mythread_cond_destroy(&thr->cond); error_cond: mythread_mutex_destroy(&thr->mutex); error_mutex: return LZMA_MEM_ERROR; } static lzma_ret get_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // If there is a free structure on the stack, use it. mythread_sync(coder->mutex) { if (coder->threads_free != NULL) { coder->thr = coder->threads_free; coder->threads_free = coder->threads_free->next; // The thread is no longer in the cache so substract // it from the cached memory usage. Don't add it // to mem_in_use though; the caller will handle it // since it knows how much memory it will actually // use (the filter chain might change). coder->mem_cached -= coder->thr->mem_filters; } } if (coder->thr == NULL) { assert(coder->threads_initialized < coder->threads_max); // Initialize a new thread. return_if_error(initialize_new_thread(coder, allocator)); } coder->thr->in_filled = 0; coder->thr->in_pos = 0; coder->thr->out_pos = 0; coder->thr->progress_in = 0; coder->thr->progress_out = 0; coder->thr->partial_update = PARTIAL_DISABLED; return LZMA_OK; } static lzma_ret read_output_and_wait(struct lzma_stream_coder *coder, const lzma_allocator *allocator, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, bool *input_is_possible, bool waiting_allowed, mythread_condtime *wait_abs, bool *has_blocked) { lzma_ret ret = LZMA_OK; mythread_sync(coder->mutex) { do { // Get as much output from the queue as is possible // without blocking. const size_t out_start = *out_pos; do { ret = lzma_outq_read(&coder->outq, allocator, out, out_pos, out_size, NULL, NULL); // If a Block was finished, tell the worker // thread of the next Block (if it is still // running) to start telling the main thread // when new output is available. if (ret == LZMA_STREAM_END) lzma_outq_enable_partial_output( &coder->outq, &worker_enable_partial_update); // Loop until a Block wasn't finished. // It's important to loop around even if // *out_pos == out_size because there could // be an empty Block that will return // LZMA_STREAM_END without needing any // output space. } while (ret == LZMA_STREAM_END); // Check if lzma_outq_read reported an error from // the Block decoder. if (ret != LZMA_OK) break; // If the output buffer is now full but it wasn't full // when this function was called, set out_was_filled. // This way the next call to stream_decode_mt() knows // that some output was produced and no output space // remained in the previous call to stream_decode_mt(). if (*out_pos == out_size && *out_pos != out_start) coder->out_was_filled = true; // Check if any thread has indicated an error. if (coder->thread_error != LZMA_OK) { // If LZMA_FAIL_FAST was used, report errors // from worker threads immediately. if (coder->fail_fast) { ret = coder->thread_error; break; } // Otherwise set pending_error. The value we // set here will not actually get used other // than working as a flag that an error has // occurred. This is because in SEQ_ERROR // all output before the error will be read // first by calling this function, and once we // reach the location of the (first) error the // error code from the above lzma_outq_read() // will be returned to the application. // // Use LZMA_PROG_ERROR since the value should // never leak to the application. It's // possible that pending_error has already // been set but that doesn't matter: if we get // here, pending_error only works as a flag. coder->pending_error = LZMA_PROG_ERROR; } // Check if decoding of the next Block can be started. // The memusage of the active threads must be low // enough, there must be a free buffer slot in the // output queue, and there must be a free thread // (that can be either created or an existing one // reused). // // NOTE: This is checked after reading the output // above because reading the output can free a slot in // the output queue and also reduce active memusage. // // NOTE: If output queue is empty, then input will // always be possible. if (input_is_possible != NULL && coder->memlimit_threading - coder->mem_in_use - coder->outq.mem_in_use >= coder->mem_next_block && lzma_outq_has_buf(&coder->outq) && (coder->threads_initialized < coder->threads_max || coder->threads_free != NULL)) { *input_is_possible = true; break; } // If the caller doesn't want us to block, return now. if (!waiting_allowed) break; // This check is needed only when input_is_possible // is NULL. We must return if we aren't waiting for // input to become possible and there is no more // output coming from the queue. if (lzma_outq_is_empty(&coder->outq)) { assert(input_is_possible == NULL); break; } // If there is more data available from the queue, // our out buffer must be full and we need to return // so that the application can provide more output // space. // // NOTE: In general lzma_outq_is_readable() can return // true also when there are no more bytes available. // This can happen when a Block has finished without // providing any new output. We know that this is not // the case because in the beginning of this loop we // tried to read as much as possible even when we had // no output space left and the mutex has been locked // all the time (so worker threads cannot have changed // anything). Thus there must be actual pending output // in the queue. if (lzma_outq_is_readable(&coder->outq)) { assert(*out_pos == out_size); break; } // If the application stops providing more input // in the middle of a Block, there will eventually // be one worker thread left that is stuck waiting for // more input (that might never arrive) and a matching // outbuf which the worker thread cannot finish due // to lack of input. We must detect this situation, // otherwise we would end up waiting indefinitely // (if no timeout is in use) or keep returning // LZMA_TIMED_OUT while making no progress. Thus, the // application would never get LZMA_BUF_ERROR from // lzma_code() which would tell the application that // no more progress is possible. No LZMA_BUF_ERROR // means that, for example, truncated .xz files could // cause an infinite loop. // // A worker thread doing partial updates will // store not only the output position in outbuf->pos // but also the matching input position in // outbuf->decoder_in_pos. Here we check if that // input position matches the amount of input that // the worker thread has been given (in_filled). // If so, we must return and not wait as no more // output will be coming without first getting more // input to the worker thread. If the application // keeps calling lzma_code() without providing more // input, it will eventually get LZMA_BUF_ERROR. // // NOTE: We can read partial_update and in_filled // without thr->mutex as only the main thread // modifies these variables. decoder_in_pos requires // coder->mutex which we are already holding. if (coder->thr != NULL && coder->thr->partial_update != PARTIAL_DISABLED) { // There is exactly one outbuf in the queue. assert(coder->thr->outbuf == coder->outq.head); assert(coder->thr->outbuf == coder->outq.tail); if (coder->thr->outbuf->decoder_in_pos == coder->thr->in_filled) break; } // Wait for input or output to become possible. if (coder->timeout != 0) { // See the comment in stream_encoder_mt.c // about why mythread_condtime_set() is used // like this. // // FIXME? // In contrast to the encoder, this calls // _condtime_set while the mutex is locked. if (!*has_blocked) { *has_blocked = true; mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); } if (mythread_cond_timedwait(&coder->cond, &coder->mutex, wait_abs) != 0) { ret = LZMA_TIMED_OUT; break; } } else { mythread_cond_wait(&coder->cond, &coder->mutex); } } while (ret == LZMA_OK); } // If we are returning an error, then the application cannot get // more output from us and thus keeping the threads running is // useless and waste of CPU time. if (ret != LZMA_OK && ret != LZMA_TIMED_OUT) threads_stop(coder); return ret; } static lzma_ret decode_block_header(struct lzma_stream_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { if (*in_pos >= in_size) return LZMA_OK; if (coder->pos == 0) { // Detect if it's Index. - if (in[*in_pos] == 0x00) + if (in[*in_pos] == INDEX_INDICATOR) return LZMA_INDEX_DETECTED; // Calculate the size of the Block Header. Note that // Block Header decoder wants to see this byte too // so don't advance *in_pos. coder->block_options.header_size = lzma_block_header_size_decode( in[*in_pos]); } // Copy the Block Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, coder->block_options.header_size); // Return if we didn't get the whole Block Header yet. if (coder->pos < coder->block_options.header_size) return LZMA_OK; coder->pos = 0; // Version 1 is needed to support the .ignore_check option. coder->block_options.version = 1; // Block Header decoder will initialize all members of this array // so we don't need to do it here. coder->block_options.filters = coder->filters; // Decode the Block Header. return_if_error(lzma_block_header_decode(&coder->block_options, allocator, coder->buffer)); // If LZMA_IGNORE_CHECK was used, this flag needs to be set. // It has to be set after lzma_block_header_decode() because // it always resets this to false. coder->block_options.ignore_check = coder->ignore_check; // coder->block_options is ready now. return LZMA_STREAM_END; } /// Get the size of the Compressed Data + Block Padding + Check. static size_t comp_blk_size(const struct lzma_stream_coder *coder) { return vli_ceil4(coder->block_options.compressed_size) + lzma_check_size(coder->stream_flags.check); } /// Returns true if the size (compressed or uncompressed) is such that /// threaded decompression cannot be used. Sizes that are too big compared /// to SIZE_MAX must be rejected to avoid integer overflows and truncations /// when lzma_vli is assigned to a size_t. static bool is_direct_mode_needed(lzma_vli size) { return size == LZMA_VLI_UNKNOWN || size > SIZE_MAX / 3; } static lzma_ret stream_decoder_reset(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // Initialize the Index hash used to verify the Index. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); if (coder->index_hash == NULL) return LZMA_MEM_ERROR; // Reset the rest of the variables. coder->sequence = SEQ_STREAM_HEADER; coder->pos = 0; return LZMA_OK; } static lzma_ret stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { struct lzma_stream_coder *coder = coder_ptr; mythread_condtime wait_abs; bool has_blocked = false; // Determine if in SEQ_BLOCK_HEADER and SEQ_BLOCK_THR_RUN we should // tell read_output_and_wait() to wait until it can fill the output // buffer (or a timeout occurs). Two conditions must be met: // // (1) If the caller provided no new input. The reason for this // can be, for example, the end of the file or that there is // a pause in the input stream and more input is available // a little later. In this situation we should wait for output // because otherwise we would end up in a busy-waiting loop where // we make no progress and the application just calls us again // without providing any new input. This would then result in // LZMA_BUF_ERROR even though more output would be available // once the worker threads decode more data. // // (2) Even if (1) is true, we will not wait if the previous call to // this function managed to produce some output and the output // buffer became full. This is for compatibility with applications // that call lzma_code() in such a way that new input is provided // only when the output buffer didn't become full. Without this // trick such applications would have bad performance (bad // parallelization due to decoder not getting input fast enough). // // NOTE: Such loops might require that timeout is disabled (0) // if they assume that output-not-full implies that all input has // been consumed. If and only if timeout is enabled, we may return // when output isn't full *and* not all input has been consumed. // // However, if LZMA_FINISH is used, the above is ignored and we always // wait (timeout can still cause us to return) because we know that // we won't get any more input. This matters if the input file is // truncated and we are doing single-shot decoding, that is, // timeout = 0 and LZMA_FINISH is used on the first call to // lzma_code() and the output buffer is known to be big enough // to hold all uncompressed data: // // - If LZMA_FINISH wasn't handled specially, we could return // LZMA_OK before providing all output that is possible with the // truncated input. The rest would be available if lzma_code() was // called again but then it's not single-shot decoding anymore. // // - By handling LZMA_FINISH specially here, the first call will // produce all the output, matching the behavior of the // single-threaded decoder. // // So it's a very specific corner case but also easy to avoid. Note // that this special handling of LZMA_FINISH has no effect for // single-shot decoding when the input file is valid (not truncated); // premature LZMA_OK wouldn't be possible as long as timeout = 0. const bool waiting_allowed = action == LZMA_FINISH || (*in_pos == in_size && !coder->out_was_filled); coder->out_was_filled = false; while (true) switch (coder->sequence) { case SEQ_STREAM_HEADER: { // Copy the Stream Header to the internal buffer. const size_t in_old = *in_pos; lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); coder->progress_in += *in_pos - in_old; // Return if we didn't get the whole Stream Header yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Header. const lzma_ret ret = lzma_stream_header_decode( &coder->stream_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR && !coder->first_stream ? LZMA_DATA_ERROR : ret; // If we are decoding concatenated Streams, and the later // Streams have invalid Header Magic Bytes, we give // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. coder->first_stream = false; // Copy the type of the Check so that Block Header and Block // decoders see it. coder->block_options.check = coder->stream_flags.check; // Even if we return LZMA_*_CHECK below, we want // to continue from Block Header decoding. coder->sequence = SEQ_BLOCK_HEADER; // Detect if there's no integrity check or if it is // unsupported if those were requested by the application. if (coder->tell_no_check && coder->stream_flags.check == LZMA_CHECK_NONE) return LZMA_NO_CHECK; if (coder->tell_unsupported_check && !lzma_check_is_supported( coder->stream_flags.check)) return LZMA_UNSUPPORTED_CHECK; if (coder->tell_any_check) return LZMA_GET_CHECK; } // Fall through case SEQ_BLOCK_HEADER: { const size_t in_old = *in_pos; const lzma_ret ret = decode_block_header(coder, allocator, in, in_pos, in_size); coder->progress_in += *in_pos - in_old; if (ret == LZMA_OK) { // We didn't decode the whole Block Header yet. // // Read output from the queue before returning. This // is important because it is possible that the // application doesn't have any new input available // immediately. If we didn't try to copy output from // the output queue here, lzma_code() could end up // returning LZMA_BUF_ERROR even though queued output // is available. // // If the lzma_code() call provided at least one input // byte, only copy as much data from the output queue // as is available immediately. This way the // application will be able to provide more input // without a delay. // // On the other hand, if lzma_code() was called with // an empty input buffer(*), treat it specially: try // to fill the output buffer even if it requires // waiting for the worker threads to provide output // (timeout, if specified, can still cause us to // return). // // - This way the application will be able to get all // data that can be decoded from the input provided // so far. // // - We avoid both premature LZMA_BUF_ERROR and // busy-waiting where the application repeatedly // calls lzma_code() which immediately returns // LZMA_OK without providing new data. // // - If the queue becomes empty, we won't wait // anything and will return LZMA_OK immediately // (coder->timeout is completely ignored). // // (*) See the comment at the beginning of this // function how waiting_allowed is determined // and why there is an exception to the rule // of "called with an empty input buffer". assert(*in_pos == in_size); // If LZMA_FINISH was used we know that we won't get // more input, so the file must be truncated if we // get here. If worker threads don't detect any // errors, eventually there will be no more output // while we keep returning LZMA_OK which gets // converted to LZMA_BUF_ERROR in lzma_code(). // // If fail-fast is enabled then we will return // immediately using LZMA_DATA_ERROR instead of // LZMA_OK or LZMA_BUF_ERROR. Rationale for the // error code: // // - Worker threads may have a large amount of // not-yet-decoded input data and we don't // know for sure if all data is valid. Bad // data there would result in LZMA_DATA_ERROR // when fail-fast isn't used. // // - Immediate LZMA_BUF_ERROR would be a bit weird // considering the older liblzma code. lzma_code() // even has an assertion to prevent coders from // returning LZMA_BUF_ERROR directly. // // The downside of this is that with fail-fast apps // cannot always distinguish between corrupt and // truncated files. if (action == LZMA_FINISH && coder->fail_fast) { // We won't produce any more output. Stop // the unfinished worker threads so they // won't waste CPU time. threads_stop(coder); return LZMA_DATA_ERROR; } // read_output_and_wait() will call threads_stop() // if needed so with that we can use return_if_error. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, waiting_allowed, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } return LZMA_OK; } if (ret == LZMA_INDEX_DETECTED) { coder->sequence = SEQ_INDEX_WAIT_OUTPUT; break; } // See if an error occurred. if (ret != LZMA_STREAM_END) { // NOTE: Here and in all other places where // pending_error is set, it may overwrite the value // (LZMA_PROG_ERROR) set by read_output_and_wait(). // That function might overwrite value set here too. // These are fine because when read_output_and_wait() // sets pending_error, it actually works as a flag // variable only ("some error has occurred") and the // actual value of pending_error is not used in // SEQ_ERROR. In such cases SEQ_ERROR will eventually // get the correct error code from the return value of // a later read_output_and_wait() call. coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } // Calculate the memory usage of the filters / Block decoder. coder->mem_next_filters = lzma_raw_decoder_memusage( coder->filters); if (coder->mem_next_filters == UINT64_MAX) { // One or more unknown Filter IDs. coder->pending_error = LZMA_OPTIONS_ERROR; coder->sequence = SEQ_ERROR; break; } coder->sequence = SEQ_BLOCK_INIT; } // Fall through case SEQ_BLOCK_INIT: { // Check if decoding is possible at all with the current // memlimit_stop which we must never exceed. // // This needs to be the first thing in SEQ_BLOCK_INIT // to make it possible to restart decoding after increasing // memlimit_stop with lzma_memlimit_set(). if (coder->mem_next_filters > coder->memlimit_stop) { // Flush pending output before returning // LZMA_MEMLIMIT_ERROR. If the application doesn't // want to increase the limit, at least it will get // all the output possible so far. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; return LZMA_MEMLIMIT_ERROR; } // Check if the size information is available in Block Header. // If it is, check if the sizes are small enough that we don't // need to worry *too* much about integer overflows later in // the code. If these conditions are not met, we must use the // single-threaded direct mode. if (is_direct_mode_needed(coder->block_options.compressed_size) || is_direct_mode_needed( coder->block_options.uncompressed_size)) { coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Calculate the amount of memory needed for the input and // output buffers in threaded mode. // // These cannot overflow because we already checked that // the sizes are small enough using is_direct_mode_needed(). coder->mem_next_in = comp_blk_size(coder); const uint64_t mem_buffers = coder->mem_next_in + lzma_outq_outbuf_memusage( coder->block_options.uncompressed_size); // Add the amount needed by the filters. // Avoid integer overflows. if (UINT64_MAX - mem_buffers < coder->mem_next_filters) { // Use direct mode if the memusage would overflow. // This is a theoretical case that shouldn't happen // in practice unless the input file is weird (broken // or malicious). coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Amount of memory needed to decode this Block in // threaded mode: coder->mem_next_block = coder->mem_next_filters + mem_buffers; // If this alone would exceed memlimit_threading, then we must // use the single-threaded direct mode. if (coder->mem_next_block > coder->memlimit_threading) { coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Use the threaded mode. Free the direct mode decoder in // case it has been initialized. lzma_next_end(&coder->block_decoder, allocator); coder->mem_direct_mode = 0; // Since we already know what the sizes are supposed to be, // we can already add them to the Index hash. The Block // decoder will verify the values while decoding. const lzma_ret ret = lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size); if (ret != LZMA_OK) { coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } coder->sequence = SEQ_BLOCK_THR_INIT; } // Fall through case SEQ_BLOCK_THR_INIT: { // We need to wait for a multiple conditions to become true // until we can initialize the Block decoder and let a worker // thread decode it: // // - Wait for the memory usage of the active threads to drop // so that starting the decoding of this Block won't make // us go over memlimit_threading. // // - Wait for at least one free output queue slot. // // - Wait for a free worker thread. // // While we wait, we must copy decompressed data to the out // buffer and catch possible decoder errors. // // read_output_and_wait() does all the above. bool block_can_start = false; return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, &block_can_start, true, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } if (!block_can_start) { // It's not a timeout because return_if_error handles // it already. Output queue cannot be empty either // because in that case block_can_start would have // been true. Thus the output buffer must be full and // the queue isn't empty. assert(*out_pos == out_size); assert(!lzma_outq_is_empty(&coder->outq)); return LZMA_OK; } // We know that we can start decoding this Block without // exceeding memlimit_threading. However, to stay below // memlimit_threading may require freeing some of the // cached memory. // // Get a local copy of variables that require locking the // mutex. It is fine if the worker threads modify the real // values after we read these as those changes can only be // towards more favorable conditions (less memory in use, // more in cache). uint64_t mem_in_use; uint64_t mem_cached; struct worker_thread *thr = NULL; // Init to silence warning. mythread_sync(coder->mutex) { mem_in_use = coder->mem_in_use; mem_cached = coder->mem_cached; thr = coder->threads_free; } // The maximum amount of memory that can be held by other // threads and cached buffers while allowing us to start // decoding the next Block. const uint64_t mem_max = coder->memlimit_threading - coder->mem_next_block; // If the existing allocations are so large that starting // to decode this Block might exceed memlimit_threads, // try to free memory from the output queue cache first. // // NOTE: This math assumes the worst case. It's possible // that the limit wouldn't be exceeded if the existing cached // allocations are reused. if (mem_in_use + mem_cached + coder->outq.mem_allocated > mem_max) { // Clear the outq cache except leave one buffer in // the cache if its size is correct. That way we // don't free and almost immediately reallocate // an identical buffer. lzma_outq_clear_cache2(&coder->outq, allocator, coder->block_options.uncompressed_size); } // If there is at least one worker_thread in the cache and // the existing allocations are so large that starting to // decode this Block might exceed memlimit_threads, free // memory by freeing cached Block decoders. // // NOTE: The comparison is different here than above. // Here we don't care about cached buffers in outq anymore // and only look at memory actually in use. This is because // if there is something in outq cache, it's a single buffer // that can be used as is. We ensured this in the above // if-block. uint64_t mem_freed = 0; if (thr != NULL && mem_in_use + mem_cached + coder->outq.mem_in_use > mem_max) { // Don't free the first Block decoder if its memory // usage isn't greater than what this Block will need. // Typically the same filter chain is used for all // Blocks so this way the allocations can be reused // when get_thread() picks the first worker_thread // from the cache. if (thr->mem_filters <= coder->mem_next_filters) thr = thr->next; while (thr != NULL) { lzma_next_end(&thr->block_decoder, allocator); mem_freed += thr->mem_filters; thr->mem_filters = 0; thr = thr->next; } } // Update the memory usage counters. Note that coder->mem_* // may have changed since we read them so we must substract // or add the changes. mythread_sync(coder->mutex) { coder->mem_cached -= mem_freed; // Memory needed for the filters and the input buffer. // The output queue takes care of its own counter so // we don't touch it here. // // NOTE: After this, coder->mem_in_use + // coder->mem_cached might count the same thing twice. // If so, this will get corrected in get_thread() when // a worker_thread is picked from coder->free_threads // and its memory usage is substracted from mem_cached. coder->mem_in_use += coder->mem_next_in + coder->mem_next_filters; } // Allocate memory for the output buffer in the output queue. lzma_ret ret = lzma_outq_prealloc_buf( &coder->outq, allocator, coder->block_options.uncompressed_size); if (ret != LZMA_OK) { threads_stop(coder); return ret; } // Set up coder->thr. ret = get_thread(coder, allocator); if (ret != LZMA_OK) { threads_stop(coder); return ret; } // The new Block decoder memory usage is already counted in // coder->mem_in_use. Store it in the thread too. coder->thr->mem_filters = coder->mem_next_filters; // Initialize the Block decoder. coder->thr->block_options = coder->block_options; ret = lzma_block_decoder_init( &coder->thr->block_decoder, allocator, &coder->thr->block_options); // Free the allocated filter options since they are needed // only to initialize the Block decoder. lzma_filters_free(coder->filters, allocator); coder->thr->block_options.filters = NULL; // Check if memory usage calculation and Block encoder // initialization succeeded. if (ret != LZMA_OK) { coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } // Allocate the input buffer. coder->thr->in_size = coder->mem_next_in; coder->thr->in = lzma_alloc(coder->thr->in_size, allocator); if (coder->thr->in == NULL) { threads_stop(coder); return LZMA_MEM_ERROR; } // Get the preallocated output buffer. coder->thr->outbuf = lzma_outq_get_buf( &coder->outq, coder->thr); // Start the decoder. mythread_sync(coder->thr->mutex) { assert(coder->thr->state == THR_IDLE); coder->thr->state = THR_RUN; mythread_cond_signal(&coder->thr->cond); } // Enable output from the thread that holds the oldest output // buffer in the output queue (if such a thread exists). mythread_sync(coder->mutex) { lzma_outq_enable_partial_output(&coder->outq, &worker_enable_partial_update); } coder->sequence = SEQ_BLOCK_THR_RUN; } // Fall through case SEQ_BLOCK_THR_RUN: { if (action == LZMA_FINISH && coder->fail_fast) { // We know that we won't get more input and that // the caller wants fail-fast behavior. If we see // that we don't have enough input to finish this // Block, return LZMA_DATA_ERROR immediately. // See SEQ_BLOCK_HEADER for the error code rationale. const size_t in_avail = in_size - *in_pos; const size_t in_needed = coder->thr->in_size - coder->thr->in_filled; if (in_avail < in_needed) { threads_stop(coder); return LZMA_DATA_ERROR; } } // Copy input to the worker thread. size_t cur_in_filled = coder->thr->in_filled; lzma_bufcpy(in, in_pos, in_size, coder->thr->in, &cur_in_filled, coder->thr->in_size); // Tell the thread how much we copied. mythread_sync(coder->thr->mutex) { coder->thr->in_filled = cur_in_filled; // NOTE: Most of the time we are copying input faster // than the thread can decode so most of the time // calling mythread_cond_signal() is useless but // we cannot make it conditional because thr->in_pos // is updated without a mutex. And the overhead should // be very much negligible anyway. mythread_cond_signal(&coder->thr->cond); } // Read output from the output queue. Just like in // SEQ_BLOCK_HEADER, we wait to fill the output buffer // only if waiting_allowed was set to true in the beginning // of this function (see the comment there). return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, waiting_allowed, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } // Return if the input didn't contain the whole Block. if (coder->thr->in_filled < coder->thr->in_size) { assert(*in_pos == in_size); return LZMA_OK; } // The whole Block has been copied to the thread-specific // buffer. Continue from the next Block Header or Index. coder->thr = NULL; coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_BLOCK_DIRECT_INIT: { // Wait for the threads to finish and that all decoded data // has been copied to the output. That is, wait until the // output queue becomes empty. // // NOTE: No need to check for coder->pending_error as // we aren't consuming any input until the queue is empty // and if there is a pending error, read_output_and_wait() // will eventually return it before the queue is empty. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; // Free the cached output buffers. lzma_outq_clear_cache(&coder->outq, allocator); // Get rid of the worker threads, including the coder->threads // array. threads_end(coder, allocator); // Initialize the Block decoder. const lzma_ret ret = lzma_block_decoder_init( &coder->block_decoder, allocator, &coder->block_options); // Free the allocated filter options since they are needed // only to initialize the Block decoder. lzma_filters_free(coder->filters, allocator); coder->block_options.filters = NULL; // Check if Block decoder initialization succeeded. if (ret != LZMA_OK) return ret; // Make the memory usage visible to _memconfig(). coder->mem_direct_mode = coder->mem_next_filters; coder->sequence = SEQ_BLOCK_DIRECT_RUN; } // Fall through case SEQ_BLOCK_DIRECT_RUN: { const size_t in_old = *in_pos; const size_t out_old = *out_pos; const lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); coder->progress_in += *in_pos - in_old; coder->progress_out += *out_pos - out_old; if (ret != LZMA_STREAM_END) return ret; // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_INDEX_WAIT_OUTPUT: // Flush the output from all worker threads so that we can // decode the Index without thinking about threading. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; coder->sequence = SEQ_INDEX_DECODE; // Fall through case SEQ_INDEX_DECODE: { // If we don't have any input, don't call // lzma_index_hash_decode() since it would return // LZMA_BUF_ERROR, which we must not do here. if (*in_pos >= in_size) return LZMA_OK; // Decode the Index and compare it to the hash calculated // from the sizes of the Blocks (if any). const size_t in_old = *in_pos; const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, in, in_pos, in_size); coder->progress_in += *in_pos - in_old; if (ret != LZMA_STREAM_END) return ret; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. const size_t in_old = *in_pos; lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); coder->progress_in += *in_pos - in_old; // Return if we didn't get the whole Stream Footer yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Footer. The decoder gives // LZMA_FORMAT_ERROR if the magic bytes don't match, // so convert that return code to LZMA_DATA_ERROR. lzma_stream_flags footer_flags; const lzma_ret ret = lzma_stream_footer_decode( &footer_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR ? LZMA_DATA_ERROR : ret; // Check that Index Size stored in the Stream Footer matches // the real size of the Index field. if (lzma_index_hash_size(coder->index_hash) != footer_flags.backward_size) return LZMA_DATA_ERROR; // Compare that the Stream Flags fields are identical in // both Stream Header and Stream Footer. return_if_error(lzma_stream_flags_compare( &coder->stream_flags, &footer_flags)); if (!coder->concatenated) return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; } // Fall through case SEQ_STREAM_PADDING: assert(coder->concatenated); // Skip over possible Stream Padding. while (true) { if (*in_pos >= in_size) { // Unless LZMA_FINISH was used, we cannot // know if there's more input coming later. if (action != LZMA_FINISH) return LZMA_OK; // Stream Padding must be a multiple of // four bytes. return coder->pos == 0 ? LZMA_STREAM_END : LZMA_DATA_ERROR; } // If the byte is not zero, it probably indicates // beginning of a new Stream (or the file is corrupt). if (in[*in_pos] != 0x00) break; ++*in_pos; ++coder->progress_in; coder->pos = (coder->pos + 1) & 3; } // Stream Padding must be a multiple of four bytes (empty // Stream Padding is OK). if (coder->pos != 0) { ++*in_pos; ++coder->progress_in; return LZMA_DATA_ERROR; } // Prepare to decode the next Stream. return_if_error(stream_decoder_reset(coder, allocator)); break; case SEQ_ERROR: if (!coder->fail_fast) { // Let the application get all data before the point // where the error was detected. This matches the // behavior of single-threaded use. // // FIXME? Some errors (LZMA_MEM_ERROR) don't get here, // they are returned immediately. Thus in rare cases // the output will be less than in the single-threaded // mode. Maybe this doesn't matter much in practice. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); // We get here only if the error happened in the main // thread, for example, unsupported Block Header. if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; } // We only get here if no errors were detected by the worker // threads. Errors from worker threads would have already been // returned by the call to read_output_and_wait() above. return coder->pending_error; default: assert(0); return LZMA_PROG_ERROR; } // Never reached } static void stream_decoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) { struct lzma_stream_coder *coder = coder_ptr; threads_end(coder, allocator); lzma_outq_end(&coder->outq, allocator); lzma_next_end(&coder->block_decoder, allocator); lzma_filters_free(coder->filters, allocator); lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_check stream_decoder_mt_get_check(const void *coder_ptr) { const struct lzma_stream_coder *coder = coder_ptr; return coder->stream_flags.check; } static lzma_ret stream_decoder_mt_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { // NOTE: This function gets/sets memlimit_stop. For now, // memlimit_threading cannot be modified after initialization. // // *memusage will include cached memory too. Excluding cached memory // would be misleading and it wouldn't help the applications to // know how much memory is actually needed to decompress the file // because the higher the number of threads and the memlimits are // the more memory the decoder may use. // // Setting a new limit includes the cached memory too and too low // limits will be rejected. Alternative could be to free the cached // memory immediately if that helps to bring the limit down but // the current way is the simplest. It's unlikely that limit needs // to be lowered in the middle of a file anyway; the typical reason // to want a new limit is to increase after LZMA_MEMLIMIT_ERROR // and even such use isn't common. struct lzma_stream_coder *coder = coder_ptr; mythread_sync(coder->mutex) { *memusage = coder->mem_direct_mode + coder->mem_in_use + coder->mem_cached + coder->outq.mem_allocated; } // If no filter chains are allocated, *memusage may be zero. // Always return at least LZMA_MEMUSAGE_BASE. if (*memusage < LZMA_MEMUSAGE_BASE) *memusage = LZMA_MEMUSAGE_BASE; *old_memlimit = coder->memlimit_stop; if (new_memlimit != 0) { if (new_memlimit < *memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit_stop = new_memlimit; } return LZMA_OK; } static void stream_decoder_mt_get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out) { struct lzma_stream_coder *coder = coder_ptr; // Lock coder->mutex to prevent finishing threads from moving their // progress info from the worker_thread structure to lzma_stream_coder. mythread_sync(coder->mutex) { *progress_in = coder->progress_in; *progress_out = coder->progress_out; for (size_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { *progress_in += coder->threads[i].progress_in; *progress_out += coder->threads[i] .progress_out; } } } return; } static lzma_ret stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_mt *options) { struct lzma_stream_coder *coder; if (options->threads == 0 || options->threads > LZMA_THREADS_MAX) return LZMA_OPTIONS_ERROR; if (options->flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_next_coder_init(&stream_decoder_mt_init, next, allocator); coder = next->coder; if (!coder) { coder = lzma_alloc(sizeof(struct lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; if (mythread_mutex_init(&coder->mutex)) { lzma_free(coder, allocator); return LZMA_MEM_ERROR; } if (mythread_cond_init(&coder->cond)) { mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); return LZMA_MEM_ERROR; } next->code = &stream_decode_mt; next->end = &stream_decoder_mt_end; next->get_check = &stream_decoder_mt_get_check; next->memconfig = &stream_decoder_mt_memconfig; next->get_progress = &stream_decoder_mt_get_progress; coder->filters[0].id = LZMA_VLI_UNKNOWN; memzero(&coder->outq, sizeof(coder->outq)); coder->block_decoder = LZMA_NEXT_CODER_INIT; coder->mem_direct_mode = 0; coder->index_hash = NULL; coder->threads = NULL; coder->threads_free = NULL; coder->threads_initialized = 0; } // Cleanup old filter chain if one remains after unfinished decoding // of a previous Stream. lzma_filters_free(coder->filters, allocator); // By allocating threads from scratch we can start memory-usage // accounting from scratch, too. Changes in filter and block sizes may // affect number of threads. // // FIXME? Reusing should be easy but unlike the single-threaded // decoder, with some types of input file combinations reusing // could leave quite a lot of memory allocated but unused (first // file could allocate a lot, the next files could use fewer // threads and some of the allocations from the first file would not // get freed unless memlimit_threading forces us to clear caches). // // NOTE: The direct mode decoder isn't freed here if one exists. // It will be reused or freed as needed in the main loop. threads_end(coder, allocator); // All memusage counters start at 0 (including mem_direct_mode). // The little extra that is needed for the structs in this file // get accounted well enough by the filter chain memory usage // which adds LZMA_MEMUSAGE_BASE for each chain. However, // stream_decoder_mt_memconfig() has to handle this specially so that // it will never return less than LZMA_MEMUSAGE_BASE as memory usage. coder->mem_in_use = 0; coder->mem_cached = 0; coder->mem_next_block = 0; coder->progress_in = 0; coder->progress_out = 0; coder->sequence = SEQ_STREAM_HEADER; coder->thread_error = LZMA_OK; coder->pending_error = LZMA_OK; coder->thr = NULL; coder->timeout = options->timeout; coder->memlimit_threading = my_max(1, options->memlimit_threading); coder->memlimit_stop = my_max(1, options->memlimit_stop); if (coder->memlimit_threading > coder->memlimit_stop) coder->memlimit_threading = coder->memlimit_stop; coder->tell_no_check = (options->flags & LZMA_TELL_NO_CHECK) != 0; coder->tell_unsupported_check = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0; coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0; coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0; coder->first_stream = true; coder->out_was_filled = false; coder->pos = 0; coder->threads_max = options->threads; return_if_error(lzma_outq_init(&coder->outq, allocator, coder->threads_max)); return stream_decoder_reset(coder, allocator); } extern LZMA_API(lzma_ret) lzma_stream_decoder_mt(lzma_stream *strm, const lzma_mt *options) { lzma_next_strm_init(stream_decoder_mt_init, strm, options); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/src/liblzma/simple/arm64.c b/src/liblzma/simple/arm64.c index 5e7f26562d5a..0fe0824eb931 100644 --- a/src/liblzma/simple/arm64.c +++ b/src/liblzma/simple/arm64.c @@ -1,136 +1,137 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file arm64.c /// \brief Filter for ARM64 binaries /// /// This converts ARM64 relative addresses in the BL and ADRP immediates /// to absolute values to increase redundancy of ARM64 code. /// /// Converting B or ADR instructions was also tested but it's not useful. /// A majority of the jumps for the B instruction are very small (+/- 0xFF). /// These are typical for loops and if-statements. Encoding them to their /// absolute address reduces redundancy since many of the small relative /// jump values are repeated, but very few of the absolute addresses are. // // Authors: Lasse Collin // Jia Tan +// Igor Pavlov // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" static size_t arm64_code(void *simple lzma_attribute((__unused__)), uint32_t now_pos, bool is_encoder, uint8_t *buffer, size_t size) { size_t i; // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower // with auto-vectorization that is enabled by default with -O2. // Such vectorization bloat happens with -O2 when targeting ARM64 too // but performance hasn't been tested. #ifdef __clang__ # pragma clang loop vectorize(disable) #endif for (i = 0; i + 4 <= size; i += 4) { uint32_t pc = (uint32_t)(now_pos + i); uint32_t instr = read32le(buffer + i); if ((instr >> 26) == 0x25) { // BL instruction: // The full 26-bit immediate is converted. // The range is +/-128 MiB. // // Using the full range is helps quite a lot with // big executables. Smaller range would reduce false // positives in non-code sections of the input though // so this is a compromise that slightly favors big // files. With the full range only six bits of the 32 // need to match to trigger a conversion. const uint32_t src = instr; instr = 0x94000000; pc >>= 2; if (!is_encoder) pc = 0U - pc; instr |= (src + pc) & 0x03FFFFFF; write32le(buffer + i, instr); } else if ((instr & 0x9F000000) == 0x90000000) { // ADRP instruction: // Only values in the range +/-512 MiB are converted. // // Using less than the full +/-4 GiB range reduces // false positives on non-code sections of the input // while being excellent for executables up to 512 MiB. // The positive effect of ADRP conversion is smaller // than that of BL but it also doesn't hurt so much in // non-code sections of input because, with +/-512 MiB // range, nine bits of 32 need to match to trigger a // conversion (two 10-bit match choices = 9 bits). const uint32_t src = ((instr >> 29) & 3) | ((instr >> 3) & 0x001FFFFC); // With the addition only one branch is needed to // check the +/- range. This is usually false when // processing ARM64 code so branch prediction will // handle it well in terms of performance. // //if ((src & 0x001E0000) != 0 // && (src & 0x001E0000) != 0x001E0000) if ((src + 0x00020000) & 0x001C0000) continue; instr &= 0x9000001F; pc >>= 12; if (!is_encoder) pc = 0U - pc; const uint32_t dest = src + pc; instr |= (dest & 3) << 29; instr |= (dest & 0x0003FFFC) << 3; instr |= (0U - (dest & 0x00020000)) & 0x00E00000; write32le(buffer + i, instr); } } return i; } static lzma_ret arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, bool is_encoder) { return lzma_simple_coder_init(next, allocator, filters, &arm64_code, 0, 4, 4, is_encoder); } #ifdef HAVE_ENCODER_ARM64 extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return arm64_coder_init(next, allocator, filters, true); } #endif #ifdef HAVE_DECODER_ARM64 extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return arm64_coder_init(next, allocator, filters, false); } #endif diff --git a/src/xz/message.c b/src/xz/message.c index 4b5776568dd1..c54ebc5b5800 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -1,1137 +1,1146 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file message.c /// \brief Printing messages // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include /// Number of the current file static unsigned int files_pos = 0; /// Total number of input files; zero if unknown. static unsigned int files_total; /// Verbosity level static enum message_verbosity verbosity = V_WARNING; /// Filename which we will print with the verbose messages static const char *filename; /// True once the a filename has been printed to stderr as part of progress /// message. If automatic progress updating isn't enabled, this becomes true /// after the first progress message has been printed due to user sending /// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print /// an empty line before the next filename to make the output more readable. static bool first_filename_printed = false; /// This is set to true when we have printed the current filename to stderr /// as part of a progress message. This variable is useful only if not /// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or /// SIGALRM signals, we won't print the name of the same file multiple times. static bool current_filename_printed = false; /// True if we should print progress indicator and update it automatically /// if also verbose >= V_VERBOSE. static bool progress_automatic; /// True if message_progress_start() has been called but /// message_progress_end() hasn't been called yet. static bool progress_started = false; /// This is true when a progress message was printed and the cursor is still /// on the same line with the progress message. In that case, a newline has /// to be printed before any error messages. static bool progress_active = false; /// Pointer to lzma_stream used to do the encoding or decoding. static lzma_stream *progress_strm; /// This is true if we are in passthru mode (not actually compressing or /// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...). /// That is, we are using coder_passthru() in coder.c. static bool progress_is_from_passthru; /// Expected size of the input stream is needed to show completion percentage /// and estimate remaining time. static uint64_t expected_in_size; // Use alarm() and SIGALRM when they are supported. This has two minor // advantages over the alternative of polling gettimeofday(): // - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to // get intermediate progress information even when --verbose wasn't used // or stderr is not a terminal. // - alarm() + SIGALRM seems to have slightly less overhead than polling // gettimeofday(). #ifdef SIGALRM const int message_progress_sigs[] = { SIGALRM, #ifdef SIGINFO SIGINFO, #endif #ifdef SIGUSR1 SIGUSR1, #endif 0 }; /// The signal handler for SIGALRM sets this to true. It is set back to false /// once the progress message has been updated. static volatile sig_atomic_t progress_needs_updating = false; /// Signal handler for SIGALRM static void progress_signal_handler(int sig lzma_attribute((__unused__))) { progress_needs_updating = true; return; } #else /// This is true when progress message printing is wanted. Using the same /// variable name as above to avoid some ifdefs. static bool progress_needs_updating = false; /// Elapsed time when the next progress message update should be done. static uint64_t progress_next_update; #endif extern void message_init(void) { // If --verbose is used, we use a progress indicator if and only // if stderr is a terminal. If stderr is not a terminal, we print // verbose information only after finishing the file. As a special // exception, even if --verbose was not used, user can send SIGALRM // to make us print progress information once without automatic // updating. progress_automatic = isatty(STDERR_FILENO); // Commented out because COLUMNS is rarely exported to environment. // Most users have at least 80 columns anyway, let's think something // fancy here if enough people complain. /* if (progress_automatic) { // stderr is a terminal. Check the COLUMNS environment // variable to see if the terminal is wide enough. If COLUMNS // doesn't exist or it has some unparsable value, we assume // that the terminal is wide enough. const char *columns_str = getenv("COLUMNS"); if (columns_str != NULL) { char *endptr; const long columns = strtol(columns_str, &endptr, 10); if (*endptr != '\0' || columns < 80) progress_automatic = false; } } */ #ifdef SIGALRM // Establish the signal handlers which set a flag to tell us that // progress info should be updated. struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = &progress_signal_handler; for (size_t i = 0; message_progress_sigs[i] != 0; ++i) if (sigaction(message_progress_sigs[i], &sa, NULL)) message_signal_handler(); #endif return; } extern void message_verbosity_increase(void) { if (verbosity < V_DEBUG) ++verbosity; return; } extern void message_verbosity_decrease(void) { if (verbosity > V_SILENT) --verbosity; return; } extern enum message_verbosity message_verbosity_get(void) { return verbosity; } extern void message_set_files(unsigned int files) { files_total = files; return; } /// Prints the name of the current file if it hasn't been printed already, /// except if we are processing exactly one stream from stdin to stdout. /// I think it looks nicer to not print "(stdin)" when --verbose is used /// in a pipe and no other files are processed. static void print_filename(void) { if (!opt_robot && (files_total != 1 || filename != stdin_filename)) { signals_block(); FILE *file = opt_mode == MODE_LIST ? stdout : stderr; // If a file was already processed, put an empty line // before the next filename to improve readability. if (first_filename_printed) fputc('\n', file); first_filename_printed = true; current_filename_printed = true; // If we don't know how many files there will be due // to usage of --files or --files0. if (files_total == 0) fprintf(file, "%s (%u)\n", filename, files_pos); else fprintf(file, "%s (%u/%u)\n", filename, files_pos, files_total); signals_unblock(); } return; } extern void message_filename(const char *src_name) { // Start numbering the files starting from one. ++files_pos; filename = src_name; if (verbosity >= V_VERBOSE && (progress_automatic || opt_mode == MODE_LIST)) print_filename(); else current_filename_printed = false; return; } extern void message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size) { // Store the pointer to the lzma_stream used to do the coding. // It is needed to find out the position in the stream. progress_strm = strm; progress_is_from_passthru = is_passthru; // Store the expected size of the file. If we aren't printing any // statistics, then is will be unused. But since it is possible // that the user sends us a signal to show statistics, we need // to have it available anyway. expected_in_size = in_size; // Indicate that progress info may need to be printed before // printing error messages. progress_started = true; // If progress indicator is wanted, print the filename and possibly // the file count now. if (verbosity >= V_VERBOSE && progress_automatic) { // Start the timer to display the first progress message // after one second. An alternative would be to show the // first message almost immediately, but delaying by one // second looks better to me, since extremely early // progress info is pretty much useless. #ifdef SIGALRM // First disable a possibly existing alarm. alarm(0); progress_needs_updating = false; alarm(1); #else progress_needs_updating = true; progress_next_update = 1000; #endif } return; } /// Make the string indicating completion percentage. static const char * progress_percentage(uint64_t in_pos) { // If the size of the input file is unknown or the size told us is // clearly wrong since we have processed more data than the alleged // size of the file, show a static string indicating that we have // no idea of the completion percentage. if (expected_in_size == 0 || in_pos > expected_in_size) return "--- %"; // Never show 100.0 % before we actually are finished. double percentage = (double)(in_pos) / (double)(expected_in_size) * 99.9; // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; snprintf(buf, sizeof(buf), "%.1f %%", percentage); return buf; } /// Make the string containing the amount of input processed, amount of /// output produced, and the compression ratio. static const char * progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) { // Use big enough buffer to hold e.g. a multibyte thousand separators. static char buf[128]; char *pos = buf; size_t left = sizeof(buf); // Print the sizes. If this the final message, use more reasonable // units than MiB if the file was small. const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; my_snprintf(&pos, &left, "%s / %s", uint64_to_nicestr(compressed_pos, unit_min, NICESTR_TIB, false, 0), uint64_to_nicestr(uncompressed_pos, unit_min, NICESTR_TIB, false, 1)); // Avoid division by zero. If we cannot calculate the ratio, set // it to some nice number greater than 10.0 so that it gets caught // in the next if-clause. const double ratio = uncompressed_pos > 0 ? (double)(compressed_pos) / (double)(uncompressed_pos) : 16.0; // If the ratio is very bad, just indicate that it is greater than // 9.999. This way the length of the ratio field stays fixed. if (ratio > 9.999) snprintf(pos, left, " > %.3f", 9.999); else snprintf(pos, left, " = %.3f", ratio); return buf; } /// Make the string containing the processing speed of uncompressed data. static const char * progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) { // Don't print the speed immediately, since the early values look // somewhat random. if (elapsed < 3000) return ""; // The first character of KiB/s, MiB/s, or GiB/s: static const char unit[] = { 'K', 'M', 'G' }; size_t unit_index = 0; // Calculate the speed as KiB/s. double speed = (double)(uncompressed_pos) / ((double)(elapsed) * (1024.0 / 1000.0)); // Adjust the unit of the speed if needed. while (speed > 999.0) { speed /= 1024.0; if (++unit_index == ARRAY_SIZE(unit)) return ""; // Way too fast ;-) } // Use decimal point only if the number is small. Examples: // - 0.1 KiB/s // - 9.9 KiB/s // - 99 KiB/s // - 999 KiB/s // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; snprintf(buf, sizeof(buf), "%.*f %ciB/s", speed > 9.9 ? 0 : 1, speed, unit[unit_index]); return buf; } /// Make a string indicating elapsed time. The format is either /// M:SS or H:MM:SS depending on if the time is an hour or more. static const char * progress_time(uint64_t mseconds) { // 9999 hours = 416 days static char buf[sizeof("9999:59:59")]; // 32-bit variable is enough for elapsed time (136 years). uint32_t seconds = (uint32_t)(mseconds / 1000); // Don't show anything if the time is zero or ridiculously big. if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) return ""; uint32_t minutes = seconds / 60; seconds %= 60; if (minutes >= 60) { const uint32_t hours = minutes / 60; minutes %= 60; snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, hours, minutes, seconds); } else { snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, minutes, seconds); } return buf; } /// Return a string containing estimated remaining time when /// reasonably possible. static const char * progress_remaining(uint64_t in_pos, uint64_t elapsed) { // Don't show the estimated remaining time when it wouldn't // make sense: // - Input size is unknown. // - Input has grown bigger since we started (de)compressing. // - We haven't processed much data yet, so estimate would be // too inaccurate. // - Only a few seconds has passed since we started (de)compressing, // so estimate would be too inaccurate. if (expected_in_size == 0 || in_pos > expected_in_size || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) return ""; // Calculate the estimate. Don't give an estimate of zero seconds, // since it is possible that all the input has been already passed // to the library, but there is still quite a bit of output pending. uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos) * ((double)(elapsed) / 1000.0) / (double)(in_pos)); if (remaining < 1) remaining = 1; static char buf[sizeof("9 h 55 min")]; // Select appropriate precision for the estimated remaining time. if (remaining <= 10) { // A maximum of 10 seconds remaining. // Show the number of seconds as is. snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 50) { // A maximum of 50 seconds remaining. // Round up to the next multiple of five seconds. remaining = (remaining + 4) / 5 * 5; snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 590) { // A maximum of 9 minutes and 50 seconds remaining. // Round up to the next multiple of ten seconds. remaining = (remaining + 9) / 10 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", remaining / 60, remaining % 60); } else if (remaining <= 59 * 60) { // A maximum of 59 minutes remaining. // Round up to the next multiple of a minute. remaining = (remaining + 59) / 60; snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); } else if (remaining <= 9 * 3600 + 50 * 60) { // A maximum of 9 hours and 50 minutes left. // Round up to the next multiple of ten minutes. remaining = (remaining + 599) / 600 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", remaining / 60, remaining % 60); } else if (remaining <= 23 * 3600) { // A maximum of 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { // A maximum of 9 days and 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", remaining / 24, remaining % 24); } else if (remaining <= 999 * 24 * 3600) { // A maximum of 999 days remaining. ;-) // Round up to the next multiple of a day. remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); } else { // The estimated remaining time is too big. Don't show it. return ""; } return buf; } /// Get how much uncompressed and compressed data has been processed. static void progress_pos(uint64_t *in_pos, uint64_t *compressed_pos, uint64_t *uncompressed_pos) { uint64_t out_pos; if (progress_is_from_passthru) { // In passthru mode the progress info is in total_in/out but // the *progress_strm itself isn't initialized and thus we // cannot use lzma_get_progress(). *in_pos = progress_strm->total_in; out_pos = progress_strm->total_out; } else { lzma_get_progress(progress_strm, in_pos, &out_pos); } // It cannot have processed more input than it has been given. assert(*in_pos <= progress_strm->total_in); // It cannot have produced more output than it claims to have ready. assert(out_pos >= progress_strm->total_out); if (opt_mode == MODE_COMPRESS) { *compressed_pos = out_pos; *uncompressed_pos = *in_pos; } else { *compressed_pos = *in_pos; *uncompressed_pos = out_pos; } return; } extern void message_progress_update(void) { if (!progress_needs_updating) return; // Calculate how long we have been processing this file. const uint64_t elapsed = mytime_get_elapsed(); #ifndef SIGALRM if (progress_next_update > elapsed) return; progress_next_update = elapsed + 1000; #endif // Get our current position in the stream. uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Block signals so that fprintf() doesn't get interrupted. signals_block(); // Print the filename if it hasn't been printed yet. if (!current_filename_printed) print_filename(); // Print the actual progress message. The idea is that there is at // least three spaces between the fields in typical situations, but // even in rare situations there is at least one space. const char *cols[5] = { progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, false), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\r", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); #ifdef SIGALRM // Updating the progress info was finished. Reset // progress_needs_updating to wait for the next SIGALRM. // // NOTE: This has to be done before alarm(1) or with (very) bad // luck we could be setting this to false after the alarm has already // been triggered. progress_needs_updating = false; if (verbosity >= V_VERBOSE && progress_automatic) { // Mark that the progress indicator is active, so if an error // occurs, the error message gets printed cleanly. progress_active = true; // Restart the timer so that progress_needs_updating gets // set to true after about one second. alarm(1); } else { // The progress message was printed because user had sent us // SIGALRM. In this case, each progress message is printed // on its own line. fputc('\n', stderr); } #else // When SIGALRM isn't supported and we get here, it's always due to // automatic progress update. We set progress_active here too like // described above. assert(verbosity >= V_VERBOSE); assert(progress_automatic); progress_active = true; #endif signals_unblock(); return; } static void progress_flush(bool finished) { if (!progress_started || verbosity < V_VERBOSE) return; uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Avoid printing intermediate progress info if some error occurs // in the beginning of the stream. (If something goes wrong later in // the stream, it is sometimes useful to tell the user where the // error approximately occurred, especially if the error occurs // after a time-consuming operation.) if (!finished && !progress_active && (compressed_pos == 0 || uncompressed_pos == 0)) return; progress_active = false; const uint64_t elapsed = mytime_get_elapsed(); signals_block(); // When using the auto-updating progress indicator, the final // statistics are printed in the same format as the progress // indicator itself. if (progress_automatic) { const char *cols[5] = { finished ? "100 %" : progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, true), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), finished ? "" : progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\n", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); } else { // The filename is always printed. fprintf(stderr, "%s: ", filename); // Percentage is printed only if we didn't finish yet. if (!finished) { // Don't print the percentage when it isn't known // (starts with a dash). const char *percentage = progress_percentage(in_pos); if (percentage[0] != '-') fprintf(stderr, "%s, ", percentage); } // Size information is always printed. fprintf(stderr, "%s", progress_sizes( compressed_pos, uncompressed_pos, true)); // The speed and elapsed time aren't always shown. const char *speed = progress_speed(uncompressed_pos, elapsed); if (speed[0] != '\0') fprintf(stderr, ", %s", speed); const char *elapsed_str = progress_time(elapsed); if (elapsed_str[0] != '\0') fprintf(stderr, ", %s", elapsed_str); fputc('\n', stderr); } signals_unblock(); return; } extern void message_progress_end(bool success) { assert(progress_started); progress_flush(success); progress_started = false; return; } static void vmessage(enum message_verbosity v, const char *fmt, va_list ap) { if (v <= verbosity) { signals_block(); progress_flush(false); // TRANSLATORS: This is the program name in the beginning // of the line in messages. Usually it becomes "xz: ". // This is a translatable string because French needs // a space before a colon. fprintf(stderr, _("%s: "), progname); + +#ifdef __clang__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif vfprintf(stderr, fmt, ap); +#ifdef __clang__ +# pragma GCC diagnostic pop +#endif + fputc('\n', stderr); signals_unblock(); } return; } extern void message(enum message_verbosity v, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(v, fmt, ap); va_end(ap); return; } extern void message_warning(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_WARNING, fmt, ap); va_end(ap); set_exit_status(E_WARNING); return; } extern void message_error(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); set_exit_status(E_ERROR); return; } extern void message_fatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); tuklib_exit(E_ERROR, E_ERROR, false); } extern void message_bug(void) { message_fatal(_("Internal error (bug)")); } extern void message_signal_handler(void) { message_fatal(_("Cannot establish signal handlers")); } extern const char * message_strm(lzma_ret code) { switch (code) { case LZMA_NO_CHECK: return _("No integrity check; not verifying file integrity"); case LZMA_UNSUPPORTED_CHECK: return _("Unsupported type of integrity check; " "not verifying file integrity"); case LZMA_MEM_ERROR: return strerror(ENOMEM); case LZMA_MEMLIMIT_ERROR: return _("Memory usage limit reached"); case LZMA_FORMAT_ERROR: return _("File format not recognized"); case LZMA_OPTIONS_ERROR: return _("Unsupported options"); case LZMA_DATA_ERROR: return _("Compressed data is corrupt"); case LZMA_BUF_ERROR: return _("Unexpected end of input"); case LZMA_OK: case LZMA_STREAM_END: case LZMA_GET_CHECK: case LZMA_PROG_ERROR: case LZMA_SEEK_NEEDED: case LZMA_RET_INTERNAL1: case LZMA_RET_INTERNAL2: case LZMA_RET_INTERNAL3: case LZMA_RET_INTERNAL4: case LZMA_RET_INTERNAL5: case LZMA_RET_INTERNAL6: case LZMA_RET_INTERNAL7: case LZMA_RET_INTERNAL8: // Without "default", compiler will warn if new constants // are added to lzma_ret, it is not too easy to forget to // add the new constants to this function. break; } return _("Internal error (bug)"); } extern void message_mem_needed(enum message_verbosity v, uint64_t memusage) { if (v > verbosity) return; // Convert memusage to MiB, rounding up to the next full MiB. // This way the user can always use the displayed usage as // the new memory usage limit. (If we rounded to the nearest, // the user might need to +1 MiB to get high enough limit.) memusage = round_up_to_mib(memusage); uint64_t memlimit = hardware_memlimit_get(opt_mode); // Handle the case when there is no memory usage limit. // This way we don't print a weird message with a huge number. if (memlimit == UINT64_MAX) { message(v, _("%s MiB of memory is required. " "The limiter is disabled."), uint64_to_str(memusage, 0)); return; } // With US-ASCII: // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 // But there may be multibyte chars so reserve enough space. char memlimitstr[128]; // Show the memory usage limit as MiB unless it is less than 1 MiB. // This way it's easy to notice errors where one has typed // --memory=123 instead of --memory=123MiB. if (memlimit < (UINT32_C(1) << 20)) { snprintf(memlimitstr, sizeof(memlimitstr), "%s B", uint64_to_str(memlimit, 1)); } else { // Round up just like with memusage. If this function is // called for informational purposes (to just show the // current usage and limit), we should never show that // the usage is higher than the limit, which would give // a false impression that the memory usage limit isn't // properly enforced. snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", uint64_to_str(round_up_to_mib(memlimit), 1)); } message(v, _("%s MiB of memory is required. The limit is %s."), uint64_to_str(memusage, 0), memlimitstr); return; } extern void message_filters_show(enum message_verbosity v, const lzma_filter *filters) { if (v > verbosity) return; char *buf; const lzma_ret ret = lzma_str_from_filters(&buf, filters, LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, NULL); if (ret != LZMA_OK) message_fatal("%s", message_strm(ret)); fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf); free(buf); return; } extern void message_try_help(void) { // Print this with V_WARNING instead of V_ERROR to prevent it from // showing up when --quiet has been specified. message(V_WARNING, _("Try `%s --help' for more information."), progname); return; } extern void message_version(void) { // It is possible that liblzma version is different than the command // line tool version, so print both. if (opt_robot) { printf("XZ_VERSION=%" PRIu32 "\nLIBLZMA_VERSION=%" PRIu32 "\n", LZMA_VERSION, lzma_version_number()); } else { printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); printf("liblzma %s\n", lzma_version_string()); } tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } extern void message_help(bool long_help) { printf(_("Usage: %s [OPTION]... [FILE]...\n" "Compress or decompress FILEs in the .xz format.\n\n"), progname); // NOTE: The short help doesn't currently have options that // take arguments. if (long_help) puts(_("Mandatory arguments to long options are mandatory " "for short options too.\n")); if (long_help) puts(_(" Operation mode:\n")); puts(_( " -z, --compress force compression\n" " -d, --decompress force decompression\n" " -t, --test test compressed file integrity\n" " -l, --list list information about .xz files")); if (long_help) puts(_("\n Operation modifiers:\n")); puts(_( " -k, --keep keep (don't delete) input files\n" " -f, --force force overwrite of output file and (de)compress links\n" " -c, --stdout write to standard output and don't delete input files")); // NOTE: --to-stdout isn't included above because it's not // the recommended spelling. It was copied from gzip but other // compressors with gzip-like syntax don't support it. if (long_help) { puts(_( " --single-stream decompress only the first stream, and silently\n" " ignore possible remaining input data")); puts(_( " --no-sparse do not create sparse files when decompressing\n" " -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" " --files[=FILE] read filenames to process from FILE; if FILE is\n" " omitted, filenames are read from the standard input;\n" " filenames must be terminated with the newline character\n" " --files0[=FILE] like --files but use the null character as terminator")); } if (long_help) { puts(_("\n Basic file format and compression options:\n")); puts(_( " -F, --format=FMT file format to encode or decode; possible values are\n" " `auto' (default), `xz', `lzma', `lzip', and `raw'\n" " -C, --check=CHECK integrity check type: `none' (use with caution),\n" " `crc32', `crc64' (default), or `sha256'")); puts(_( " --ignore-check don't verify the integrity check when decompressing")); } puts(_( " -0 ... -9 compression preset; default is 6; take compressor *and*\n" " decompressor memory usage into account before using 7-9!")); puts(_( " -e, --extreme try to improve compression ratio by using more CPU time;\n" " does not affect decompressor memory requirements")); puts(_( " -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n" " to use as many threads as there are processor cores")); if (long_help) { puts(_( " --block-size=SIZE\n" " start a new .xz block after every SIZE bytes of input;\n" " use this to set the block size for threaded compression")); puts(_( " --block-list=SIZES\n" " start a new .xz block after the given comma-separated\n" " intervals of uncompressed data")); puts(_( " --flush-timeout=TIMEOUT\n" " when compressing, if more than TIMEOUT milliseconds has\n" " passed since the previous flush and reading more input\n" " would block, all pending data is flushed out" )); puts(_( // xgettext:no-c-format " --memlimit-compress=LIMIT\n" " --memlimit-decompress=LIMIT\n" " --memlimit-mt-decompress=LIMIT\n" " -M, --memlimit=LIMIT\n" " set memory usage limit for compression, decompression,\n" " threaded decompression, or all of these; LIMIT is in\n" " bytes, % of RAM, or 0 for defaults")); puts(_( " --no-adjust if compression settings exceed the memory usage limit,\n" " give an error instead of adjusting the settings downwards")); } if (long_help) { puts(_( "\n Custom filter chain for compression (alternative for using presets):")); #if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) // TRANSLATORS: The word "literal" in "literal context bits" // means how many "context bits" to use when encoding // literals. A literal is a single 8-bit byte. It doesn't // mean "literally" here. puts(_( "\n" " --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" " --lzma2[=OPTS] more of the following options (valid values; default):\n" " preset=PRE reset options to a preset (0-9[e])\n" " dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" " lc=NUM number of literal context bits (0-4; 3)\n" " lp=NUM number of literal position bits (0-4; 0)\n" " pb=NUM number of position bits (0-4; 2)\n" " mode=MODE compression mode (fast, normal; normal)\n" " nice=NUM nice length of a match (2-273; 64)\n" " mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" " depth=NUM maximum search depth; 0=automatic (default)")); #endif puts(_( "\n" " --x86[=OPTS] x86 BCJ filter (32-bit and 64-bit)\n" " --arm[=OPTS] ARM BCJ filter\n" " --armthumb[=OPTS] ARM-Thumb BCJ filter\n" " --arm64[=OPTS] ARM64 BCJ filter\n" " --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" " --ia64[=OPTS] IA-64 (Itanium) BCJ filter\n" " --sparc[=OPTS] SPARC BCJ filter\n" " Valid OPTS for all BCJ filters:\n" " start=NUM start offset for conversions (default=0)")); #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) puts(_( "\n" " --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" " dist=NUM distance between bytes being subtracted\n" " from each other (1-256; 1)")); #endif } if (long_help) puts(_("\n Other options:\n")); puts(_( " -q, --quiet suppress warnings; specify twice to suppress errors too\n" " -v, --verbose be verbose; specify twice for even more verbose")); if (long_help) { puts(_( " -Q, --no-warn make warnings not affect the exit status")); puts(_( " --robot use machine-parsable messages (useful for scripts)")); puts(""); puts(_( " --info-memory display the total amount of RAM and the currently active\n" " memory usage limits, and exit")); puts(_( " -h, --help display the short help (lists only the basic options)\n" " -H, --long-help display this long help and exit")); } else { puts(_( " -h, --help display this short help and exit\n" " -H, --long-help display the long help (lists also the advanced options)")); } puts(_( " -V, --version display the version number and exit")); puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); // TRANSLATORS: This message indicates the bug reporting address // for this package. Please add _another line_ saying // "Report translation bugs to <...>\n" with the email or WWW // address for translation bugs. Thanks. printf(_("Report bugs to <%s> (in English or Finnish).\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); #if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE puts(_( "THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); #endif tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } diff --git a/src/xz/mytime.c b/src/xz/mytime.c index 70444001bdd7..7e8a074961f4 100644 --- a/src/xz/mytime.c +++ b/src/xz/mytime.c @@ -1,85 +1,86 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file mytime.c /// \brief Time handling functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" -#if !(defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC) +#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_MONOTONIC) +# include +#else # include #endif uint64_t opt_flush_timeout = 0; static uint64_t start_time; static uint64_t next_flush; /// \brief Get the current time as milliseconds /// /// It's relative to some point but not necessarily to the UNIX Epoch. static uint64_t mytime_now(void) { - // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1. -#if defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC +#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_MONOTONIC) // If CLOCK_MONOTONIC was available at compile time but for some // reason isn't at runtime, fallback to CLOCK_REALTIME which // according to POSIX is mandatory for all implementations. static clockid_t clk_id = CLOCK_MONOTONIC; struct timespec tv; while (clock_gettime(clk_id, &tv)) clk_id = CLOCK_REALTIME; return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_nsec / 1000000); #else struct timeval tv; gettimeofday(&tv, NULL); return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_usec / 1000); #endif } extern void mytime_set_start_time(void) { start_time = mytime_now(); return; } extern uint64_t mytime_get_elapsed(void) { return mytime_now() - start_time; } extern void mytime_set_flush_time(void) { next_flush = mytime_now() + opt_flush_timeout; return; } extern int mytime_get_flush_timeout(void) { if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS) return -1; const uint64_t now = mytime_now(); if (now >= next_flush) return 0; const uint64_t remaining = next_flush - now; return remaining > INT_MAX ? INT_MAX : (int)remaining; } diff --git a/src/xz/suffix.c b/src/xz/suffix.c index 6bb35d4229f6..09add3817ebd 100644 --- a/src/xz/suffix.c +++ b/src/xz/suffix.c @@ -1,408 +1,411 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file suffix.c /// \brief Checks filename suffix and creates the destination filename // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #ifdef __DJGPP__ # include #endif // For case-insensitive filename suffix on case-insensitive systems #if defined(TUKLIB_DOSLIKE) || defined(__VMS) +# ifdef HAVE_STRINGS_H +# include +# endif # define strcmp strcasecmp #endif static char *custom_suffix = NULL; /// \brief Test if the char is a directory separator static bool is_dir_sep(char c) { #ifdef TUKLIB_DOSLIKE return c == '/' || c == '\\' || c == ':'; #else return c == '/'; #endif } /// \brief Test if the string contains a directory separator static bool has_dir_sep(const char *str) { #ifdef TUKLIB_DOSLIKE return strpbrk(str, "/\\:") != NULL; #else return strchr(str, '/') != NULL; #endif } #ifdef __DJGPP__ /// \brief Test for special suffix used for 8.3 short filenames (SFN) /// /// \return If str matches *.?- or *.??-, true is returned. Otherwise /// false is returned. static bool has_sfn_suffix(const char *str, size_t len) { if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.' && !is_dir_sep(str[len - 2])) { // *.?- if (str[len - 3] == '.') return !is_dir_sep(str[len - 4]); // *.??- if (len >= 5 && !is_dir_sep(str[len - 3]) && str[len - 4] == '.') return !is_dir_sep(str[len - 5]); } return false; } #endif /// \brief Checks if src_name has given compressed_suffix /// /// \param suffix Filename suffix to look for /// \param src_name Input filename /// \param src_len strlen(src_name) /// /// \return If src_name has the suffix, src_len - strlen(suffix) is /// returned. It's always a positive integer. Otherwise zero /// is returned. static size_t test_suffix(const char *suffix, const char *src_name, size_t src_len) { const size_t suffix_len = strlen(suffix); // The filename must have at least one character in addition to // the suffix. src_name may contain path to the filename, so we // need to check for directory separator too. if (src_len <= suffix_len || is_dir_sep(src_name[src_len - suffix_len - 1])) return 0; if (strcmp(suffix, src_name + src_len - suffix_len) == 0) return src_len - suffix_len; return 0; } /// \brief Removes the filename suffix of the compressed file /// /// \return Name of the uncompressed file, or NULL if file has unknown /// suffix. static char * uncompressed_name(const char *src_name, const size_t src_len) { static const struct { const char *compressed; const char *uncompressed; } suffixes[] = { { ".xz", "" }, { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. { ".lzma", "" }, #ifdef __DJGPP__ { ".lzm", "" }, #endif { ".tlz", ".tar" }, // Both .tar.lzma and .tar.lz #ifdef HAVE_LZIP_DECODER { ".lz", "" }, #endif }; const char *new_suffix = ""; size_t new_len = 0; if (opt_format == FORMAT_RAW) { // Don't check for known suffixes when --format=raw was used. if (custom_suffix == NULL) { message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; } } else { for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { new_len = test_suffix(suffixes[i].compressed, src_name, src_len); if (new_len != 0) { new_suffix = suffixes[i].uncompressed; break; } } #ifdef __DJGPP__ // Support also *.?- -> *.? and *.??- -> *.?? on DOS. // This is done also when long filenames are available // to keep it easy to decompress files created when // long filename support wasn't available. if (new_len == 0 && has_sfn_suffix(src_name, src_len)) { new_suffix = ""; new_len = src_len - 1; } #endif } if (new_len == 0 && custom_suffix != NULL) new_len = test_suffix(custom_suffix, src_name, src_len); if (new_len == 0) { message_warning(_("%s: Filename has an unknown suffix, " "skipping"), src_name); return NULL; } const size_t new_suffix_len = strlen(new_suffix); char *dest_name = xmalloc(new_len + new_suffix_len + 1); memcpy(dest_name, src_name, new_len); memcpy(dest_name + new_len, new_suffix, new_suffix_len); dest_name[new_len + new_suffix_len] = '\0'; return dest_name; } /// This message is needed in multiple places in compressed_name(), /// so the message has been put into its own function. static void msg_suffix(const char *src_name, const char *suffix) { message_warning(_("%s: File already has `%s' suffix, skipping"), src_name, suffix); return; } /// \brief Appends suffix to src_name /// /// In contrast to uncompressed_name(), we check only suffixes that are valid /// for the specified file format. static char * compressed_name(const char *src_name, size_t src_len) { // The order of these must match the order in args.h. static const char *const all_suffixes[][4] = { { ".xz", ".txz", NULL }, { ".lzma", #ifdef __DJGPP__ ".lzm", #endif ".tlz", NULL #ifdef HAVE_LZIP_DECODER // This is needed to keep the table indexing in sync with // enum format_type from coder.h. }, { /* ".lz", */ NULL #endif }, { // --format=raw requires specifying the suffix // manually or using stdout. NULL } }; // args.c ensures these. assert(opt_format != FORMAT_AUTO); #ifdef HAVE_LZIP_DECODER assert(opt_format != FORMAT_LZIP); #endif const size_t format = opt_format - 1; const char *const *suffixes = all_suffixes[format]; // Look for known filename suffixes and refuse to compress them. for (size_t i = 0; suffixes[i] != NULL; ++i) { if (test_suffix(suffixes[i], src_name, src_len) != 0) { msg_suffix(src_name, suffixes[i]); return NULL; } } #ifdef __DJGPP__ // Recognize also the special suffix that is used when long // filename (LFN) support isn't available. This suffix is // recognized on LFN systems too. if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) { msg_suffix(src_name, "-"); return NULL; } #endif if (custom_suffix != NULL) { if (test_suffix(custom_suffix, src_name, src_len) != 0) { msg_suffix(src_name, custom_suffix); return NULL; } } // TODO: Hmm, maybe it would be better to validate this in args.c, // since the suffix handling when decoding is weird now. if (opt_format == FORMAT_RAW && custom_suffix == NULL) { message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; } const char *suffix = custom_suffix != NULL ? custom_suffix : suffixes[0]; size_t suffix_len = strlen(suffix); #ifdef __DJGPP__ if (!_use_lfn(src_name)) { // Long filename (LFN) support isn't available and we are // limited to 8.3 short filenames (SFN). // // Look for suffix separator from the filename, and make sure // that it is in the filename, not in a directory name. const char *sufsep = strrchr(src_name, '.'); if (sufsep == NULL || sufsep[1] == '\0' || has_dir_sep(sufsep)) { // src_name has no filename extension. // // Examples: // xz foo -> foo.xz // xz -F lzma foo -> foo.lzm // xz -S x foo -> foox // xz -S x foo. -> foo.x // xz -S x.y foo -> foox.y // xz -S .x foo -> foo.x // xz -S .x foo. -> foo.x // // Avoid double dots: if (sufsep != NULL && sufsep[1] == '\0' && suffix[0] == '.') --src_len; } else if (custom_suffix == NULL && strcasecmp(sufsep, ".tar") == 0) { // ".tar" is handled specially. // // Examples: // xz foo.tar -> foo.txz // xz -F lzma foo.tar -> foo.tlz static const char *const tar_suffixes[] = { ".txz", // .tar.xz ".tlz", // .tar.lzma /* ".tlz", // .tar.lz */ }; suffix = tar_suffixes[format]; suffix_len = 4; src_len -= 4; } else { if (custom_suffix == NULL && opt_format == FORMAT_XZ) { // Instead of the .xz suffix, use a single // character at the end of the filename // extension. This is to minimize name // conflicts when compressing multiple files // with the same basename. E.g. foo.txt and // foo.exe become foo.tx- and foo.ex-. Dash // is rare as the last character of the // filename extension, so it seems to be // quite safe choice and it stands out better // in directory listings than e.g. x. For // comparison, gzip uses z. suffix = "-"; suffix_len = 1; } if (suffix[0] == '.') { // The first character of the suffix is a dot. // Throw away the original filename extension // and replace it with the new suffix. // // Examples: // xz -F lzma foo.txt -> foo.lzm // xz -S .x foo.txt -> foo.x src_len = sufsep - src_name; } else { // The first character of the suffix is not // a dot. Preserve the first 0-2 characters // of the original filename extension. // // Examples: // xz foo.txt -> foo.tx- // xz -S x foo.c -> foo.cx // xz -S ab foo.c -> foo.cab // xz -S ab foo.txt -> foo.tab // xz -S abc foo.txt -> foo.abc // // Truncate the suffix to three chars: if (suffix_len > 3) suffix_len = 3; // If needed, overwrite 1-3 characters. if (strlen(sufsep) > 4 - suffix_len) src_len = sufsep - src_name + 4 - suffix_len; } } } #endif char *dest_name = xmalloc(src_len + suffix_len + 1); memcpy(dest_name, src_name, src_len); memcpy(dest_name + src_len, suffix, suffix_len); dest_name[src_len + suffix_len] = '\0'; return dest_name; } extern char * suffix_get_dest_name(const char *src_name) { assert(src_name != NULL); // Length of the name is needed in all cases to locate the end of // the string to compare the suffix, so calculate the length here. const size_t src_len = strlen(src_name); return opt_mode == MODE_COMPRESS ? compressed_name(src_name, src_len) : uncompressed_name(src_name, src_len); } extern void suffix_set(const char *suffix) { // Empty suffix and suffixes having a directory separator are // rejected. Such suffixes would break things later. if (suffix[0] == '\0' || has_dir_sep(suffix)) message_fatal(_("%s: Invalid filename suffix"), suffix); // Replace the old custom_suffix (if any) with the new suffix. free(custom_suffix); custom_suffix = xstrdup(suffix); return; }